From c3aa04c0e90a6c89c7667ad435e170896e7074ac Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 22 Sep 2022 09:48:57 +0000 Subject: [PATCH 01/15] remove needless using tensor --- .../distributed/ps/service/brpc_utils.cc | 2 +- paddle/fluid/eager/eager_tensor.h | 6 +- paddle/fluid/framework/attribute_checker.h | 2 +- .../fluid/framework/copy_same_tensor_test.cc | 6 +- paddle/fluid/framework/custom_operator.cc | 16 +- .../fluid/framework/data_device_transform.cc | 4 +- .../fluid/framework/data_device_transform.h | 4 +- .../framework/data_device_transform_test.cu | 6 +- paddle/fluid/framework/data_feed.proto | 2 +- .../fluid/framework/data_layout_transform.cc | 15 +- .../fluid/framework/data_layout_transform.h | 22 +- .../framework/data_layout_transform_test.cc | 8 +- paddle/fluid/framework/data_transform.cc | 14 +- paddle/fluid/framework/data_transform.h | 6 +- paddle/fluid/framework/data_type_test.cc | 6 +- paddle/fluid/framework/data_type_transform.cc | 28 +- paddle/fluid/framework/data_type_transform.h | 12 +- .../framework/data_type_transform_test.cc | 16 +- .../framework/data_type_transform_test.cu | 16 +- .../framework/details/all_reduce_op_handle.cc | 2 +- .../framework/details/broadcast_op_handle.cc | 4 +- .../details/broadcast_op_handle_test.h | 4 +- .../framework/details/build_strategy_test.cc | 3 +- .../details/fetch_async_op_handle.cc | 4 +- .../framework/details/gather_op_handle.cc | 4 +- .../details/gather_op_handle_test.cc | 2 +- .../framework/details/nan_inf_utils_detail.cc | 35 ++- .../framework/details/nan_inf_utils_detail.cu | 2 +- .../framework/details/nan_inf_utils_detail.h | 6 +- .../framework/details/reduce_and_gather.h | 2 +- .../details/reduce_op_handle_test.cc | 4 +- .../details/scale_loss_grad_op_handle.cc | 4 +- .../details/scope_buffered_monitor.cc | 7 +- .../details/share_tensor_buffer_functor.h | 4 +- .../framework/details/variable_visitor.cc | 4 +- .../framework/details/variable_visitor.h | 2 +- paddle/fluid/framework/device_worker.cc | 32 +-- paddle/fluid/framework/device_worker.h | 7 +- paddle/fluid/framework/dlpack_tensor.cc | 2 +- paddle/fluid/framework/dlpack_tensor.h | 2 +- paddle/fluid/framework/dlpack_tensor_test.cc | 4 +- .../fluid/framework/downpour_lite_worker.cc | 10 +- paddle/fluid/framework/downpour_worker.cc | 10 +- paddle/fluid/framework/eigen.h | 27 +- paddle/fluid/framework/eigen_test.cc | 14 +- paddle/fluid/framework/fleet/ascend_wrapper.h | 11 +- paddle/fluid/framework/framework.proto | 2 +- paddle/fluid/framework/infershape_utils.cc | 2 +- paddle/fluid/framework/infershape_utils.h | 18 +- .../framework/ir/attention_lstm_fuse_pass.cc | 24 +- .../fluid/framework/ir/conv_bn_fuse_pass.cc | 18 +- paddle/fluid/framework/ir/fc_fuse_pass.cc | 2 +- .../framework/ir/fusion_group/operation.cc | 3 +- .../reference_count_pass.cc | 2 +- .../compute_propagate_scales_mkldnn_pass.cc | 22 +- .../compute_propagate_scales_mkldnn_pass.h | 11 +- ...ute_propagate_scales_mkldnn_pass_tester.cc | 8 +- .../conv_affine_channel_mkldnn_fuse_pass.cc | 16 +- .../mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc | 6 +- .../ir/mkldnn/quant_dequant_mkldnn_pass.cc | 6 +- .../ir/mkldnn/quant_dequant_mkldnn_pass.h | 4 +- .../ir/multihead_matmul_fuse_pass.cc | 12 +- paddle/fluid/framework/ir/pass_test_util.h | 2 +- paddle/fluid/framework/lod_tensor.h | 4 +- .../framework/new_executor/data_transfer.cc | 12 +- .../framework/new_executor/interpretercore.cc | 16 +- .../new_executor/new_executor_defs.cc | 2 +- paddle/fluid/framework/operator.cc | 129 +++++----- paddle/fluid/framework/operator.h | 34 +-- paddle/fluid/framework/operator_test.cc | 19 +- .../paddle2cinn/cinn_graph_symbolization.cc | 4 +- .../cinn_graph_symbolization_test.cc | 2 +- .../framework/paddle2cinn/cinn_lib_test.cc | 14 +- paddle/fluid/framework/program_desc.h | 12 +- paddle/fluid/framework/save_load_util.cc | 64 ++--- paddle/fluid/framework/save_load_util.h | 7 +- paddle/fluid/framework/save_load_util_test.cc | 8 +- .../framework/selected_rows_utils_test.cc | 6 +- paddle/fluid/framework/tensor.h | 1 - paddle/fluid/framework/tensor_test.cc | 94 +++---- paddle/fluid/framework/tensor_util.cc | 104 ++++---- paddle/fluid/framework/tensor_util.h | 67 ++--- paddle/fluid/framework/tensor_util_test.cc | 108 ++++---- paddle/fluid/framework/tensor_util_test.cu | 48 ++-- paddle/fluid/framework/tuple.h | 2 +- paddle/fluid/framework/var_type_traits.h | 2 +- paddle/fluid/framework/variable.h | 18 +- paddle/fluid/framework/variable_test.cc | 2 +- paddle/fluid/framework/version.cc | 4 +- paddle/fluid/framework/version.h | 2 +- paddle/fluid/imperative/all_reduce.cc | 4 +- paddle/fluid/imperative/basic_engine.cc | 2 +- paddle/fluid/imperative/bkcl_context.cc | 6 +- paddle/fluid/imperative/cncl_context.cc | 6 +- paddle/fluid/imperative/gloo_context.cc | 4 +- paddle/fluid/imperative/gloo_context.h | 2 +- .../fluid/imperative/gradient_accumulator.cc | 4 +- paddle/fluid/imperative/hccl_context.cc | 6 +- paddle/fluid/imperative/nccl_context.cc | 2 +- paddle/fluid/imperative/prepared_operator.cc | 4 +- paddle/fluid/imperative/prepared_operator.h | 6 +- paddle/fluid/imperative/reducer.cc | 37 ++- paddle/fluid/imperative/reducer.cu | 2 +- paddle/fluid/imperative/reducer.h | 8 +- .../tests/test_gradient_accmulator.cc | 4 +- paddle/fluid/imperative/tests/test_group.cc | 6 +- .../fluid/imperative/tests/test_prepare_op.cc | 2 +- paddle/fluid/imperative/variable_wrapper.h | 6 +- .../passes/convert_to_mixed_precision.cc | 4 +- .../ir_params_sync_among_devices_pass.cc | 12 +- .../api/details/reset_tensor_array.h | 2 +- .../inference/api/details/zero_copy_tensor.cc | 4 +- .../tensorrt/convert/fill_constant_op.cc | 2 +- .../inference/tensorrt/convert/gelu_op.cc | 4 +- .../inference/tensorrt/convert/matmul_op.cc | 3 +- .../inference/tensorrt/convert/op_converter.h | 4 +- .../inference/tensorrt/convert/scale_op.cc | 2 +- .../tensorrt/convert/strided_slice_op.cc | 2 +- paddle/fluid/inference/tensorrt/engine.cc | 24 +- paddle/fluid/inference/tensorrt/engine.h | 10 +- .../plugin/emb_eltwise_layernorm_plugin.h | 11 +- .../plugin/fused_token_prune_op_plugin.cu | 2 +- .../tensorrt/plugin/group_norm_op_plugin.cu | 4 +- .../tensorrt/plugin/group_norm_op_plugin.h | 25 +- .../tensorrt/plugin/instance_norm_op_plugin.h | 4 +- .../tensorrt/plugin/layer_norm_op_plugin.h | 25 +- .../tensorrt/plugin/qkv_to_context_plugin.cu | 8 +- .../tensorrt/plugin/qkv_to_context_plugin.h | 11 +- .../inference/tensorrt/test_dynamic_engine.cc | 8 +- .../fluid/inference/tensorrt/test_engine.cc | 4 +- .../inference/tensorrt/trt_int8_calibrator.cc | 2 +- .../inference/tensorrt/trt_int8_calibrator.h | 2 +- paddle/fluid/operators/abs_op.cc | 2 +- paddle/fluid/operators/abs_op_mlu.cc | 12 +- paddle/fluid/operators/abs_op_npu.cc | 12 +- paddle/fluid/operators/activation_cudnn.cu.cc | 4 +- .../fluid/operators/activation_cudnn_op.cu.cc | 20 +- paddle/fluid/operators/activation_op.cc | 8 +- paddle/fluid/operators/activation_op.h | 73 +++--- paddle/fluid/operators/activation_op.kps | 16 +- paddle/fluid/operators/activation_op_mlu.cc | 82 +++--- paddle/fluid/operators/activation_op_npu.cc | 2 +- paddle/fluid/operators/addmm_op.cc | 1 - paddle/fluid/operators/affine_channel_op.cc | 20 +- paddle/fluid/operators/affine_channel_op.cu | 22 +- .../fluid/operators/affine_channel_op_xpu.cc | 20 +- paddle/fluid/operators/affine_grid_op.cc | 2 +- .../amp/alloc_float_status_op_npu.cc | 4 +- .../amp/check_finite_and_unscale_op_mlu.cc | 10 +- .../amp/check_finite_and_unscale_op_npu.cc | 14 +- .../check_finite_and_unscale_op_npu_test.cc | 2 +- .../amp/clear_float_status_op_npu.cc | 6 +- .../operators/amp/get_float_status_op_npu.cc | 6 +- .../operators/amp/update_loss_scaling_op.cc | 2 +- .../amp/update_loss_scaling_op_npu.cc | 51 ++-- paddle/fluid/operators/arg_max_op_mlu.cc | 10 +- paddle/fluid/operators/arg_max_op_npu.cc | 6 +- paddle/fluid/operators/arg_min_op_npu.cc | 6 +- paddle/fluid/operators/argsort_op_mlu.cc | 8 +- paddle/fluid/operators/argsort_op_npu.cc | 32 +-- paddle/fluid/operators/array_operator.h | 2 +- .../fluid/operators/array_to_lod_tensor_op.cc | 4 +- paddle/fluid/operators/ascend_trigger_op.h | 4 +- paddle/fluid/operators/assign_op.cc | 2 +- paddle/fluid/operators/assign_op_test.cc | 4 +- paddle/fluid/operators/assign_pos_op.cu | 2 +- paddle/fluid/operators/assign_value_op.h | 16 +- paddle/fluid/operators/attention_lstm_op.cc | 25 +- paddle/fluid/operators/attention_lstm_op.h | 2 +- paddle/fluid/operators/batch_fc_op.cu | 17 +- paddle/fluid/operators/batch_norm_op.cc | 19 +- paddle/fluid/operators/batch_norm_op.cu | 2 +- paddle/fluid/operators/batch_norm_op.h | 6 +- paddle/fluid/operators/batch_norm_op_mlu.cc | 46 ++-- paddle/fluid/operators/batch_norm_op_npu.cc | 50 ++-- paddle/fluid/operators/bce_loss_op.cc | 2 - paddle/fluid/operators/bce_loss_op_mlu.cc | 16 +- paddle/fluid/operators/bce_loss_op_npu.cc | 16 +- paddle/fluid/operators/beam_search_op.h | 2 +- paddle/fluid/operators/bilateral_slice_op.cc | 3 +- paddle/fluid/operators/bilateral_slice_op.cu | 27 +- paddle/fluid/operators/bincount_op.cc | 1 - paddle/fluid/operators/bmm_op.h | 10 +- paddle/fluid/operators/bpr_loss_op.h | 27 +- .../fluid/operators/broadcast_tensors_op.cc | 1 - paddle/fluid/operators/cast_op.h | 12 +- paddle/fluid/operators/cast_op_mlu.cc | 6 +- paddle/fluid/operators/cast_op_npu.cc | 6 +- paddle/fluid/operators/center_loss_op.cu | 18 +- paddle/fluid/operators/center_loss_op.h | 22 +- paddle/fluid/operators/chunk_eval_op.h | 16 +- .../operators/cinn/cinn_launch_context.h | 2 +- paddle/fluid/operators/clip_by_norm_op.h | 2 +- paddle/fluid/operators/clip_by_norm_op_npu.cc | 6 +- paddle/fluid/operators/clip_by_norm_op_xpu.cc | 8 +- paddle/fluid/operators/clip_op_mlu.cc | 20 +- paddle/fluid/operators/clip_op_npu.cc | 24 +- paddle/fluid/operators/coalesce_tensor_op.cc | 2 +- .../fluid/operators/collective/allreduce_op.h | 4 +- .../operators/collective/barrier_op.cu.cc | 4 +- .../operators/collective/broadcast_op.cu.cc | 4 +- .../operators/collective/broadcast_op_xpu.cc | 4 +- .../operators/collective/c_allgather_op.cu.cc | 4 +- .../operators/collective/c_allgather_op.h | 4 +- .../collective/c_allgather_op_mlu.cc | 4 +- .../collective/c_allgather_op_npu.cc | 4 +- .../collective/c_allgather_op_xpu.cc | 4 +- .../operators/collective/c_allreduce_op.h | 28 +- .../operators/collective/c_broadcast_op.cu.cc | 4 +- .../operators/collective/c_broadcast_op.h | 4 +- .../collective/c_broadcast_op_mlu.cc | 4 +- .../collective/c_broadcast_op_npu.cc | 4 +- .../operators/collective/c_concat_op.cu.cc | 10 +- .../collective/c_embedding_op_npu.cc | 16 +- .../fluid/operators/collective/c_reduce_op.h | 12 +- .../collective/c_reducescatter_op.cu.cc | 4 +- .../collective/c_reducescatter_op_npu.cc | 4 +- .../operators/collective/c_scatter_op.cu.cc | 10 +- .../fluid/operators/collective/c_scatter_op.h | 4 +- .../c_softmax_with_cross_entropy_op.cu | 31 +-- .../fluid/operators/collective/c_split_op.cu | 4 +- .../collective/global_gather_op.cu.cc | 8 +- .../collective/global_scatter_op.cu.cc | 8 +- .../collective/partial_allgather_op.cu.cc | 4 +- .../collective/partial_allgather_op_npu.cc | 4 +- .../operators/collective/recv_v2_op.cu.cc | 12 +- .../operators/collective/send_v2_op.cu.cc | 15 +- paddle/fluid/operators/concat_op.cc | 8 +- paddle/fluid/operators/concat_op_mlu.cc | 7 +- paddle/fluid/operators/concat_op_npu.cc | 5 +- .../fluid/operators/controlflow/fetch_op.cc | 2 +- .../operators/controlflow/fetch_v2_op.cc | 4 +- .../operators/controlflow/logical_op_mlu.cc | 8 +- .../operators/controlflow/logical_op_npu.cc | 18 +- .../operators/controlflow/logical_op_xpu.h | 14 +- paddle/fluid/operators/conv_base_helper.h | 10 +- paddle/fluid/operators/conv_cudnn_helper.h | 4 +- paddle/fluid/operators/conv_miopen_helper.h | 4 +- paddle/fluid/operators/conv_op.cc | 8 +- paddle/fluid/operators/conv_op.h | 6 +- paddle/fluid/operators/conv_op_mlu.cc | 40 +-- paddle/fluid/operators/conv_op_npu.cc | 68 ++--- paddle/fluid/operators/conv_shift_op.cc | 17 +- paddle/fluid/operators/conv_shift_op.cu | 21 +- paddle/fluid/operators/conv_transpose_op.cc | 2 +- paddle/fluid/operators/conv_transpose_op.h | 2 +- .../fluid/operators/conv_transpose_op_mlu.cc | 22 +- .../fluid/operators/conv_transpose_op_npu.cc | 28 +- paddle/fluid/operators/copy_cross_scope_op.cc | 4 +- paddle/fluid/operators/correlation_op.cc | 14 +- paddle/fluid/operators/correlation_op.cu | 28 +- paddle/fluid/operators/cos_sim_op.cc | 2 - paddle/fluid/operators/cos_sim_op.h | 27 +- paddle/fluid/operators/crf_decoding_op.h | 25 +- paddle/fluid/operators/crop_op.cc | 2 - paddle/fluid/operators/crop_op.h | 24 +- paddle/fluid/operators/crop_op_npu.cc | 10 +- paddle/fluid/operators/crop_tensor_op.cc | 6 +- paddle/fluid/operators/cross_entropy_op.h | 32 +-- paddle/fluid/operators/ctc_align_op.h | 2 +- .../fluid/operators/cuda_graph_with_in_out.h | 28 +- paddle/fluid/operators/cudnn_lstm_cache.h | 2 +- paddle/fluid/operators/cudnn_lstm_op.cu.cc | 73 +++--- paddle/fluid/operators/cudnn_rnn_cache.h | 4 +- paddle/fluid/operators/cumsum_op_mlu.cc | 8 +- paddle/fluid/operators/cumsum_op_npu.cc | 10 +- paddle/fluid/operators/cvm_op.cc | 2 +- paddle/fluid/operators/cvm_op.cu | 4 +- paddle/fluid/operators/cvm_op.h | 4 +- paddle/fluid/operators/data_norm_op.cc | 48 ++-- paddle/fluid/operators/data_norm_op.cu | 52 ++-- paddle/fluid/operators/decode_jpeg_op.cc | 2 +- .../fluid/operators/deformable_conv_op_mlu.cc | 38 +-- .../operators/deformable_psroi_pooling_op.cu | 30 ++- .../operators/deformable_psroi_pooling_op.h | 27 +- .../fluid/operators/dequantize_abs_max_op.cc | 6 +- .../fluid/operators/dequantize_abs_max_op.cu | 6 +- .../fluid/operators/dequantize_abs_max_op.h | 12 +- paddle/fluid/operators/dequantize_log_op.cc | 6 +- paddle/fluid/operators/dequantize_log_op.cu | 6 +- paddle/fluid/operators/dequantize_log_op.h | 12 +- paddle/fluid/operators/dequantize_op.h | 1 - .../detection/anchor_generator_op.cu | 14 +- .../operators/detection/anchor_generator_op.h | 8 +- .../fluid/operators/detection/bbox_util.cu.h | 2 +- paddle/fluid/operators/detection/bbox_util.h | 38 +-- .../operators/detection/bipartite_match_op.cc | 11 +- .../fluid/operators/detection/box_clip_op.cu | 4 +- .../fluid/operators/detection/box_clip_op.h | 2 +- .../operators/detection/box_coder_op_npu.cc | 78 +++--- .../detection/box_decoder_and_assign_op.cu | 6 +- .../detection/box_decoder_and_assign_op.h | 6 +- .../detection/collect_fpn_proposals_op.cc | 2 +- .../detection/collect_fpn_proposals_op.cu | 8 +- .../detection/collect_fpn_proposals_op.h | 4 +- .../detection/density_prior_box_op.cu | 12 +- .../detection/density_prior_box_op.h | 10 +- .../detection/density_prior_box_op_npu.cc | 52 ++-- .../detection/generate_mask_labels_op.cc | 22 +- .../detection/generate_proposal_labels_op.cc | 52 ++-- .../detection/generate_proposals_op.cc | 21 +- .../detection/generate_proposals_op.cu | 21 +- .../detection/generate_proposals_v2_op.cc | 2 +- .../operators/detection/iou_similarity_op.h | 2 +- .../detection/iou_similarity_op_mlu.cc | 34 ++- .../detection/iou_similarity_op_npu.cc | 34 ++- .../detection/iou_similarity_op_xpu.cc | 2 +- .../detection/locality_aware_nms_op.cc | 16 +- .../operators/detection/matrix_nms_op.cc | 2 +- .../detection/mine_hard_examples_op.cc | 10 +- .../operators/detection/multiclass_nms_op.cc | 27 +- paddle/fluid/operators/detection/nms_op.cc | 2 - .../detection/polygon_box_transform_op.cc | 6 +- .../detection/polygon_box_transform_op.cu | 6 +- .../fluid/operators/detection/prior_box_op.cc | 2 +- .../fluid/operators/detection/prior_box_op.h | 10 +- .../operators/detection/prior_box_op_npu.cc | 10 +- .../retinanet_detection_output_op.cc | 12 +- .../detection/roi_perspective_transform_op.cc | 21 +- .../detection/roi_perspective_transform_op.cu | 21 +- .../detection/rpn_target_assign_op.cc | 45 ++-- .../detection/sigmoid_focal_loss_op.cc | 2 - .../detection/sigmoid_focal_loss_op.cu | 21 +- .../detection/sigmoid_focal_loss_op.h | 21 +- .../operators/detection/target_assign_op.h | 6 +- .../fluid/operators/detection/yolo_box_op.cc | 2 - .../operators/detection/yolov3_loss_op.cc | 2 - paddle/fluid/operators/detection_map_op.cc | 2 +- paddle/fluid/operators/detection_map_op.h | 10 +- paddle/fluid/operators/dgc_clip_by_norm_op.cc | 2 +- paddle/fluid/operators/dgc_clip_by_norm_op.h | 8 +- paddle/fluid/operators/dgc_op.cc | 2 +- paddle/fluid/operators/dgc_op.h | 24 +- paddle/fluid/operators/diag_op.h | 4 +- paddle/fluid/operators/dropout_impl.cu.h | 42 +-- paddle/fluid/operators/dropout_impl_util.h | 4 +- paddle/fluid/operators/dropout_op.cc | 4 +- paddle/fluid/operators/dropout_op_mlu.cc | 16 +- paddle/fluid/operators/dropout_op_npu.cc | 16 +- paddle/fluid/operators/eig_op.h | 6 +- paddle/fluid/operators/eigh_op.cc | 2 - paddle/fluid/operators/eigvalsh_op.cc | 2 - .../elementwise/elementwise_add_op_mlu.cc | 12 +- .../elementwise/elementwise_add_op_npu.cc | 12 +- .../elementwise/elementwise_div_op.h | 4 +- .../elementwise/elementwise_div_op_mlu.cc | 14 +- .../elementwise/elementwise_div_op_npu.cc | 20 +- .../elementwise_floordiv_op_npu.cc | 8 +- .../elementwise/elementwise_max_op_npu.cc | 18 +- .../elementwise/elementwise_min_op_mlu.cc | 2 +- .../elementwise/elementwise_min_op_npu.cc | 18 +- .../operators/elementwise/elementwise_mlu.h | 26 +- .../elementwise/elementwise_mod_op_npu.cc | 8 +- .../elementwise/elementwise_mul_op.h | 4 +- .../elementwise/elementwise_mul_op_mlu.cc | 12 +- .../elementwise/elementwise_mul_op_npu.cc | 22 +- .../operators/elementwise/elementwise_npu.h | 14 +- .../operators/elementwise/elementwise_op.h | 20 +- .../elementwise/elementwise_op_broadcast.cu.h | 4 +- .../elementwise/elementwise_op_function.h | 118 ++++----- .../elementwise/elementwise_op_impl.cu.h | 4 +- .../elementwise/elementwise_pow_op_mlu.cc | 12 +- .../elementwise/elementwise_pow_op_npu.cc | 18 +- .../elementwise/elementwise_sub_op_mlu.cc | 14 +- .../elementwise/elementwise_sub_op_npu.cc | 20 +- .../operators/elementwise/elementwise_xpu.h | 10 +- .../mkldnn/elementwise_mkldnn_op.h | 25 +- paddle/fluid/operators/empty_op.cc | 2 +- paddle/fluid/operators/expand_as_op.cc | 2 - paddle/fluid/operators/expand_as_op.h | 24 +- paddle/fluid/operators/expand_as_v2_op.cc | 2 - paddle/fluid/operators/expand_as_v2_op.h | 2 +- paddle/fluid/operators/expand_as_v2_op_mlu.cc | 8 +- paddle/fluid/operators/expand_as_v2_op_npu.cc | 6 +- paddle/fluid/operators/expand_op.cc | 6 +- paddle/fluid/operators/expand_op.h | 28 +- paddle/fluid/operators/expand_op_npu.cc | 2 +- paddle/fluid/operators/expand_v2_op.cc | 6 +- paddle/fluid/operators/expand_v2_op.h | 12 +- paddle/fluid/operators/expand_v2_op_mlu.cc | 4 +- paddle/fluid/operators/expand_v2_op_npu.cc | 10 +- paddle/fluid/operators/eye_op_npu.cc | 4 +- paddle/fluid/operators/fake_dequantize_op.cc | 24 +- .../fluid/operators/fake_dequantize_op.cu.h | 12 +- paddle/fluid/operators/fake_dequantize_op.h | 24 +- paddle/fluid/operators/fake_quantize_op.cc | 50 ++-- paddle/fluid/operators/fake_quantize_op.cu.h | 50 ++-- paddle/fluid/operators/fake_quantize_op.h | 144 +++++------ paddle/fluid/operators/fc_op.h | 6 +- paddle/fluid/operators/feed_forward_test.cu | 4 +- paddle/fluid/operators/fill_any_like_op.cc | 2 +- .../fluid/operators/fill_any_like_op_mlu.cc | 2 +- .../fluid/operators/fill_any_like_op_npu.cc | 2 +- .../fill_constant_batch_size_like_op_mlu.cc | 2 +- .../fill_constant_batch_size_like_op_npu.cc | 4 +- paddle/fluid/operators/fill_constant_op.cc | 2 +- .../fluid/operators/fill_constant_op_mlu.cc | 4 +- .../fluid/operators/fill_constant_op_npu.cc | 2 +- paddle/fluid/operators/fill_diagonal_op.cc | 4 +- .../operators/fill_diagonal_tensor_op.cc | 4 +- paddle/fluid/operators/fill_zeros_like_op.h | 2 +- .../fluid/operators/fill_zeros_like_op_npu.cc | 4 +- paddle/fluid/operators/filter_by_instag_op.cu | 4 +- paddle/fluid/operators/filter_by_instag_op.h | 4 +- paddle/fluid/operators/flatten_op.cc | 2 +- paddle/fluid/operators/flatten_op_npu.cc | 6 +- paddle/fluid/operators/flip_op.cc | 1 - paddle/fluid/operators/fsp_op.h | 19 +- .../fluid/operators/fused/attn_bias_add.cu.h | 2 +- paddle/fluid/operators/fused/attn_gemm.h | 28 +- paddle/fluid/operators/fused/attn_gemm_int8.h | 62 ++--- .../fluid/operators/fused/conv_fusion_op.cu | 16 +- .../operators/fused/cudnn_bn_add_relu_test.cc | 171 ++++++------- .../fused/cudnn_bn_stats_finalize.cu.h | 2 +- .../operators/fused/cudnn_norm_conv.cu.h | 2 +- .../operators/fused/cudnn_norm_conv_test.cc | 92 +++---- .../fused/cudnn_scale_bias_add_relu.cu.h | 2 +- paddle/fluid/operators/fused/fmha_ref.h | 74 +++--- .../operators/fused/fused_attention_op.cc | 6 +- .../operators/fused/fused_attention_op.cu | 185 ++++++++------ ...sed_bias_dropout_residual_layer_norm_op.cc | 6 +- ...sed_bias_dropout_residual_layer_norm_op.cu | 51 ++-- .../operators/fused/fused_bn_activation_op.cc | 40 +-- .../operators/fused/fused_bn_activation_op.cu | 43 ++-- .../operators/fused/fused_bn_activation_op.h | 4 +- .../fused/fused_bn_add_activation_op.cc | 6 +- .../fused/fused_bn_add_activation_op.cu | 47 ++-- .../fused/fused_bn_add_activation_op.h | 2 +- .../fused/fused_dropout_act_bias_test.cu | 4 +- .../operators/fused/fused_dropout_helper.h | 9 +- .../fused/fused_elemwise_activation_op.cc | 4 +- .../fused/fused_elemwise_activation_op.h | 120 +++++---- .../fused_embedding_eltwise_layernorm_op.cc | 2 +- .../fused_embedding_eltwise_layernorm_op.cu | 14 +- .../fused/fused_embedding_fc_lstm_op.cc | 26 +- .../fused/fused_embedding_fc_lstm_op.h | 2 +- .../fused/fused_embedding_seq_pool_op.h | 2 +- .../fused_fc_elementwise_layernorm_op.cu | 18 +- .../operators/fused/fused_feedforward_op.cc | 4 +- .../operators/fused/fused_feedforward_op.cu | 241 +++++++++--------- .../operators/fused/fused_gate_attention.h | 167 ++++++------ .../fused/fused_gate_attention_op.cc | 2 +- .../fused/fused_gate_attention_op.cu | 116 +++++---- .../operators/fused/fused_gemm_epilogue_op.cc | 2 +- .../operators/fused/fused_gemm_epilogue_op.cu | 30 ++- .../fused/fused_gemm_epilogue_op_xpu.cc | 28 +- ...ed_layernorm_residual_dropout_bias_test.cu | 4 +- .../fused/fused_multi_transformer_int8_op.cc | 2 +- .../fused/fused_multi_transformer_int8_op.cu | 47 ++-- .../fused/fused_multi_transformer_op.cc | 4 +- .../fused/fused_multi_transformer_op.cu | 38 +-- .../fused/fused_multi_transformer_op.h | 4 +- .../fused/fused_residual_dropout_bias_test.cu | 4 +- .../operators/fused/fused_seqpool_cvm_op.cu | 2 +- .../operators/fused/fused_softmax_mask.cu.h | 2 - .../fused/fusion_conv_inception_op.cu | 12 +- .../operators/fused/fusion_group_op_test.cc | 18 +- paddle/fluid/operators/fused/fusion_gru_op.cc | 10 +- paddle/fluid/operators/fused/fusion_gru_op.h | 2 +- .../fluid/operators/fused/fusion_lstm_op.cc | 16 +- paddle/fluid/operators/fused/fusion_lstm_op.h | 2 +- .../fused/fusion_repeated_fc_relu_op.cc | 10 +- .../fused/fusion_repeated_fc_relu_op.h | 2 +- .../fused/fusion_seqconv_eltadd_relu_op.cc | 6 +- .../fused/fusion_seqconv_eltadd_relu_op.h | 2 +- .../fused/fusion_seqexpand_concat_fc_op.cc | 6 +- .../fused/fusion_seqexpand_concat_fc_op.h | 2 +- .../fused/fusion_seqpool_concat_op.h | 2 +- .../fused/fusion_seqpool_cvm_concat_op.h | 2 +- .../fused/fusion_squared_mat_sub_op.cc | 12 +- .../fused/fusion_squared_mat_sub_op.h | 2 +- .../fusion_transpose_flatten_concat_op.cc | 2 - .../fusion_transpose_flatten_concat_op.cu.cc | 4 +- .../fused/mkldnn/fusion_gru_mkldnn_op.cc | 24 +- .../fused/mkldnn/fusion_lstm_mkldnn_op.cc | 32 ++- .../fused/mkldnn/fusion_rnn_mkldnn.h | 8 +- .../fused/mkldnn/multi_gru_mkldnn_op.cc | 16 +- paddle/fluid/operators/fused/multi_gru_op.h | 1 - .../operators/fused/multihead_matmul_op.cu | 12 +- .../operators/fused/resnet_basic_block_op.cc | 42 +-- .../fused/resnet_basic_block_op_xpu.cc | 232 ++++++++++------- .../fluid/operators/fused/resnet_unit_op.cc | 22 +- .../fluid/operators/fused/resnet_unit_op.cu | 106 ++++---- .../operators/fused/resnet_unit_op_xpu.cc | 98 +++---- .../operators/fused/skip_layernorm_op.cu | 12 +- .../fluid/operators/fused/yolo_box_head_op.cu | 6 +- .../fluid/operators/fused/yolo_box_post_op.cu | 14 +- .../fluid/operators/fused_softmax_mask_op.cc | 2 - .../fused_softmax_mask_upper_triangle_op.cc | 2 - .../fused_softmax_mask_upper_triangle_op.cu | 13 +- .../fluid/operators/fused_token_prune_op.cc | 2 - .../fluid/operators/fused_token_prune_op.cu | 33 +-- paddle/fluid/operators/gather_nd_op.cc | 2 +- paddle/fluid/operators/gather_nd_op_mlu.cc | 20 +- paddle/fluid/operators/gather_nd_op_npu.cc | 20 +- paddle/fluid/operators/gather_op.cc | 4 +- paddle/fluid/operators/gather_op_mlu.cc | 12 +- paddle/fluid/operators/gather_op_npu.cc | 16 +- .../fluid/operators/gather_scatter_kernel.cc | 20 +- .../fluid/operators/gather_scatter_kernel.cu | 17 +- .../fluid/operators/gather_scatter_kernel.h | 26 +- paddle/fluid/operators/gather_test.cc | 6 +- paddle/fluid/operators/gaussian_random_op.cc | 6 +- paddle/fluid/operators/gaussian_random_op.cu | 2 +- .../fluid/operators/gaussian_random_op_mlu.cc | 4 +- .../fluid/operators/gaussian_random_op_npu.cc | 4 +- paddle/fluid/operators/gelu_op_npu.cc | 12 +- .../fluid/operators/graph_khop_sampler_op.cu | 20 +- .../fluid/operators/graph_khop_sampler_op.h | 20 +- .../operators/grid_sampler_cudnn_op.cu.cc | 20 +- paddle/fluid/operators/grid_sampler_op.cc | 2 +- paddle/fluid/operators/grid_sampler_op_mlu.cc | 8 +- paddle/fluid/operators/group_norm_op.cc | 4 +- paddle/fluid/operators/group_norm_op.cu | 33 +-- paddle/fluid/operators/group_norm_op.h | 31 +-- paddle/fluid/operators/group_norm_op_npu.cc | 69 +++-- paddle/fluid/operators/gru_op.cc | 8 +- paddle/fluid/operators/gru_op.cu.cc | 6 +- paddle/fluid/operators/gru_op.h | 18 +- paddle/fluid/operators/gru_unit_op.cc | 2 - paddle/fluid/operators/gru_unit_op.h | 41 +-- paddle/fluid/operators/hinge_loss_op.h | 14 +- paddle/fluid/operators/histogram_op.cc | 1 - paddle/fluid/operators/huber_loss_op_npu.cc | 46 ++-- paddle/fluid/operators/im2sequence_op.h | 14 +- paddle/fluid/operators/increment_op_npu.cc | 4 +- paddle/fluid/operators/index_sample_op_npu.cc | 14 +- paddle/fluid/operators/index_select_op.cc | 2 - paddle/fluid/operators/index_select_op.h | 2 +- paddle/fluid/operators/index_select_op_npu.cc | 15 +- paddle/fluid/operators/inplace_abn_op.cc | 94 +++---- paddle/fluid/operators/inplace_abn_op.cu | 48 ++-- paddle/fluid/operators/inplace_abn_op.h | 2 +- paddle/fluid/operators/instance_norm_op.cc | 20 +- paddle/fluid/operators/instance_norm_op.h | 2 +- .../fluid/operators/instance_norm_op_npu.cc | 14 +- paddle/fluid/operators/interpolate_op.cc | 5 +- paddle/fluid/operators/interpolate_op.cu | 73 +++--- paddle/fluid/operators/interpolate_op.h | 125 ++++----- paddle/fluid/operators/interpolate_op_npu.cc | 20 +- paddle/fluid/operators/interpolate_v2_op.cc | 5 +- .../fluid/operators/interpolate_v2_op_mlu.cc | 29 ++- .../fluid/operators/interpolate_v2_op_npu.cc | 111 ++++---- paddle/fluid/operators/ipu/ipu_runtime_op.cc | 4 +- paddle/fluid/operators/isfinite_op.h | 60 ++--- paddle/fluid/operators/jit/benchmark.cc | 2 +- paddle/fluid/operators/kldiv_loss_op.cc | 2 - paddle/fluid/operators/kldiv_loss_op_npu.cc | 16 +- paddle/fluid/operators/kron_op.cc | 4 +- paddle/fluid/operators/l1_norm_op.cc | 2 - paddle/fluid/operators/l1_norm_op.h | 14 +- paddle/fluid/operators/label_smooth_op_mlu.cc | 6 +- paddle/fluid/operators/label_smooth_op_npu.cc | 22 +- paddle/fluid/operators/layer_norm_kernel.cu.h | 6 +- paddle/fluid/operators/layer_norm_op.cc | 5 +- paddle/fluid/operators/layer_norm_op_mlu.cc | 31 +-- paddle/fluid/operators/layer_norm_op_npu.cc | 60 ++--- paddle/fluid/operators/layout_utils.h | 18 +- .../fluid/operators/limit_by_capacity_op.cu | 10 +- paddle/fluid/operators/linear_chain_crf_op.h | 125 +++++---- paddle/fluid/operators/linspace_op.cc | 2 +- paddle/fluid/operators/lod_reset_op.cc | 2 +- paddle/fluid/operators/lod_reset_op.h | 6 +- .../fluid/operators/lod_tensor_to_array_op.cc | 12 +- paddle/fluid/operators/log_loss_op_npu.cc | 39 +-- paddle/fluid/operators/log_loss_op_xpu.cc | 17 +- paddle/fluid/operators/log_softmax_op_npu.cc | 10 +- .../fluid/operators/lookup_table_dequant_op.h | 2 +- paddle/fluid/operators/lookup_table_op.h | 2 +- paddle/fluid/operators/lookup_table_v2_op.cu | 24 +- paddle/fluid/operators/lookup_table_v2_op.h | 6 +- .../fluid/operators/lookup_table_v2_op_mlu.cc | 2 +- .../fluid/operators/lookup_table_v2_op_npu.cc | 2 +- paddle/fluid/operators/lrn_op.cc | 25 +- paddle/fluid/operators/lrn_op.cu | 16 +- paddle/fluid/operators/lrn_op.h | 37 +-- paddle/fluid/operators/lstm_op.h | 31 +-- paddle/fluid/operators/lstm_unit_op.cu | 27 +- paddle/fluid/operators/lstm_unit_op.h | 27 +- paddle/fluid/operators/lstmp_op.h | 37 +-- paddle/fluid/operators/margin_rank_loss_op.h | 16 +- paddle/fluid/operators/marker_op.cu | 4 +- .../fluid/operators/masked_select_op_mlu.cc | 12 +- .../fluid/operators/masked_select_op_npu.cc | 12 +- .../fluid/operators/match_matrix_tensor_op.cc | 8 +- .../fluid/operators/match_matrix_tensor_op.h | 2 +- paddle/fluid/operators/math/beam_search.cc | 2 +- paddle/fluid/operators/math/beam_search.cu | 2 +- paddle/fluid/operators/math/beam_search.h | 2 +- .../fluid/operators/math/beam_search_npu.cc | 14 +- .../fluid/operators/math/beam_search_xpu.cc | 2 +- .../fluid/operators/math/concat_and_split.cc | 40 +-- .../fluid/operators/math/concat_and_split.cu | 10 +- .../fluid/operators/math/concat_and_split.h | 10 +- paddle/fluid/operators/math/concat_test.cc | 56 ++-- paddle/fluid/operators/math/context_project.h | 10 +- paddle/fluid/operators/math/cross_entropy.cc | 20 +- paddle/fluid/operators/math/cross_entropy.cu | 6 +- paddle/fluid/operators/math/cross_entropy.h | 6 +- paddle/fluid/operators/math/im2col.cc | 16 +- paddle/fluid/operators/math/im2col.cu | 16 +- paddle/fluid/operators/math/im2col.h | 8 +- paddle/fluid/operators/math/im2col_cfo_cpu.h | 12 +- paddle/fluid/operators/math/im2col_test.cc | 26 +- .../fluid/operators/math/matrix_bit_code.cc | 106 ++++---- paddle/fluid/operators/math/matrix_bit_code.h | 46 ++-- paddle/fluid/operators/math/maxouting.cc | 12 +- paddle/fluid/operators/math/maxouting.cu | 12 +- paddle/fluid/operators/math/maxouting.h | 12 +- paddle/fluid/operators/math/sample_prob.cu | 8 +- paddle/fluid/operators/math/sample_prob.h | 14 +- .../operators/math/selected_rows_functor.cc | 8 +- .../operators/math/selected_rows_functor.cu | 8 +- .../operators/math/selected_rows_functor.h | 8 +- .../math/selected_rows_functor_test.cc | 12 +- .../math/selected_rows_functor_test.cu.cc | 19 +- .../fluid/operators/math/sequence_padding.cc | 4 +- .../fluid/operators/math/sequence_pooling.cc | 12 +- .../fluid/operators/math/sequence_pooling.cu | 4 +- .../fluid/operators/math/sequence_pooling.h | 4 +- .../operators/math/sequence_pooling_test.cc | 4 +- paddle/fluid/operators/math/softmax.cu | 12 +- paddle/fluid/operators/math/softmax.h | 20 +- paddle/fluid/operators/math/softmax_impl.h | 50 ++-- paddle/fluid/operators/math/tree2col.cc | 14 +- paddle/fluid/operators/math/tree2col.cu | 14 +- paddle/fluid/operators/math/tree2col.h | 14 +- paddle/fluid/operators/math/unpooling.cc | 32 +-- paddle/fluid/operators/math/unpooling.cu | 32 +-- paddle/fluid/operators/math/unpooling.h | 32 +-- paddle/fluid/operators/math/vol2col.cc | 8 +- paddle/fluid/operators/math/vol2col.cu | 8 +- paddle/fluid/operators/math/vol2col.h | 8 +- paddle/fluid/operators/math/vol2col_test.cc | 16 +- paddle/fluid/operators/matmul_op.cc | 63 +++-- paddle/fluid/operators/matmul_op_mlu.cc | 40 +-- paddle/fluid/operators/matmul_op_npu.cc | 46 ++-- paddle/fluid/operators/matmul_op_xpu.cc | 18 +- paddle/fluid/operators/matmul_v2_op.cc | 4 +- paddle/fluid/operators/matmul_v2_op.h | 10 +- paddle/fluid/operators/matmul_v2_op_mlu.cc | 46 ++-- paddle/fluid/operators/matmul_v2_op_npu.cc | 40 +-- paddle/fluid/operators/mean_iou_op.cu | 16 +- paddle/fluid/operators/mean_iou_op.h | 18 +- paddle/fluid/operators/mean_op_mlu.cc | 12 +- paddle/fluid/operators/mean_op_npu.cc | 6 +- paddle/fluid/operators/memcpy_d2h_op.cc | 2 +- paddle/fluid/operators/memcpy_h2d_op.cc | 2 +- paddle/fluid/operators/memcpy_op.cc | 2 +- paddle/fluid/operators/meshgrid_op.cc | 4 +- paddle/fluid/operators/meshgrid_op_mlu.cc | 4 +- paddle/fluid/operators/meshgrid_op_npu.cc | 6 +- .../operators/metrics/accuracy_op_mlu.cc | 10 +- .../operators/metrics/accuracy_op_npu.cc | 12 +- .../operators/metrics/accuracy_op_xpu.cc | 14 +- .../operators/metrics/precision_recall_op.h | 16 +- paddle/fluid/operators/minus_op.h | 6 +- paddle/fluid/operators/miopen_lstm_cache.h | 2 +- paddle/fluid/operators/miopen_rnn_cache.h | 4 +- .../operators/mkldnn/activation_mkldnn_op.cc | 18 +- .../operators/mkldnn/batch_norm_mkldnn_op.cc | 49 ++-- .../operators/mkldnn/concat_mkldnn_op.cc | 37 +-- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 98 +++---- .../mkldnn/conv_transpose_mkldnn_op.cc | 27 +- .../operators/mkldnn/dequantize_mkldnn_op.cc | 8 +- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 10 +- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 57 +++-- .../mkldnn/fill_constant_mkldnn_op.cc | 8 +- .../operators/mkldnn/interpolate_mkldnn_op.cc | 16 +- .../operators/mkldnn/layer_norm_mkldnn_op.cc | 22 +- .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 28 +- .../fluid/operators/mkldnn/matmul_mkldnn_op.h | 8 +- .../operators/mkldnn/matmul_v2_mkldnn_op.cc | 51 ++-- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 16 +- .../fluid/operators/mkldnn/pad3d_mkldnn_op.cc | 8 +- .../fluid/operators/mkldnn/prelu_mkldnn_op.cc | 27 +- .../operators/mkldnn/quantize_mkldnn_op.cc | 6 +- .../operators/mkldnn/requantize_mkldnn_op.cc | 8 +- .../operators/mkldnn/reshape_mkldnn_op.cc | 8 +- .../fluid/operators/mkldnn/shape_mkldnn_op.cc | 4 +- .../mkldnn/shuffle_channel_mkldnn_op.cc | 7 +- .../fluid/operators/mkldnn/slice_mkldnn_op.cc | 32 ++- .../operators/mkldnn/softmax_mkldnn_op.cc | 23 +- .../operators/mkldnn/softplus_mkldnn_op.h | 8 +- .../fluid/operators/mkldnn/split_mkldnn_op.cc | 11 +- .../fluid/operators/mkldnn/stack_mkldnn_op.cc | 13 +- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 4 +- .../operators/mkldnn/transpose_mkldnn_op.cc | 13 +- paddle/fluid/operators/mlu/mlu_baseop.cc | 8 +- paddle/fluid/operators/mlu/mlu_baseop.h | 20 +- .../fluid/operators/modified_huber_loss_op.cu | 10 +- .../fluid/operators/modified_huber_loss_op.h | 18 +- paddle/fluid/operators/mul_op.cc | 1 - paddle/fluid/operators/mul_op_npu.cc | 16 +- paddle/fluid/operators/multi_dot_op.cc | 2 +- paddle/fluid/operators/multinomial_op_npu.cc | 6 +- paddle/fluid/operators/multiplex_op.cc | 2 +- paddle/fluid/operators/nccl/nccl_op.cu.cc | 2 +- paddle/fluid/operators/nce_op.cc | 2 - paddle/fluid/operators/nce_op.h | 68 +++-- paddle/fluid/operators/norm_op_npu.cc | 16 +- paddle/fluid/operators/norm_utils.cu.h | 2 +- paddle/fluid/operators/number_count_op.cu | 2 +- paddle/fluid/operators/one_hot_op.cc | 2 +- paddle/fluid/operators/one_hot_op.cu | 4 +- paddle/fluid/operators/one_hot_op.h | 4 +- paddle/fluid/operators/one_hot_op_npu.cc | 4 +- paddle/fluid/operators/one_hot_op_xpu.cc | 4 +- paddle/fluid/operators/one_hot_v2_op.cc | 2 +- paddle/fluid/operators/one_hot_v2_op_mlu.cc | 5 +- paddle/fluid/operators/one_hot_v2_op_npu.cc | 4 +- .../fluid/operators/optimizers/adadelta_op.cc | 2 +- .../fluid/operators/optimizers/adagrad_op.cc | 2 +- paddle/fluid/operators/optimizers/adam_op.h | 4 +- .../operators/optimizers/adam_op_functor.h | 4 +- .../fluid/operators/optimizers/adam_op_mlu.cc | 56 ++-- .../fluid/operators/optimizers/adam_op_npu.cc | 22 +- .../fluid/operators/optimizers/adamax_op.cc | 2 +- .../optimizers/decayed_adagrad_op.cc | 2 +- .../operators/optimizers/decayed_adagrad_op.h | 12 +- .../operators/optimizers/dgc_momentum_op.cc | 2 +- .../operators/optimizers/dgc_momentum_op.h | 42 +-- .../distributed_fused_lamb_init_op.cu | 72 +++--- .../optimizers/distributed_fused_lamb_op.cc | 2 +- .../optimizers/distributed_fused_lamb_op.cu | 39 ++- paddle/fluid/operators/optimizers/dpsgd_op.cc | 2 +- paddle/fluid/operators/optimizers/dpsgd_op.h | 8 +- paddle/fluid/operators/optimizers/ftrl_op.cc | 2 +- paddle/fluid/operators/optimizers/ftrl_op.h | 18 +- paddle/fluid/operators/optimizers/lamb_op.cc | 2 +- .../operators/optimizers/lars_momentum_op.cu | 2 +- .../operators/optimizers/lars_momentum_op.h | 2 +- .../operators/optimizers/merged_adam_op.cc | 4 +- .../optimizers/merged_momentum_op_mlu.cc | 12 +- .../optimizers/merged_momentum_op_npu.cc | 12 +- .../optimizers/mkldnn/sgd_mkldnn_op.cc | 12 +- .../fluid/operators/optimizers/momentum_op.cc | 2 +- .../operators/optimizers/momentum_op_mlu.cc | 12 +- .../operators/optimizers/momentum_op_npu.cc | 12 +- .../pow2_decay_with_linear_warmup_op.h | 8 +- .../pow2_decay_with_linear_warmup_op_xpu.cc | 8 +- .../optimizers/proximal_adagrad_op.cc | 2 +- .../optimizers/proximal_adagrad_op.h | 18 +- .../operators/optimizers/proximal_gd_op.cc | 2 +- .../operators/optimizers/proximal_gd_op.h | 13 +- .../operators/optimizers/rmsprop_op_npu.cc | 6 +- paddle/fluid/operators/optimizers/sgd_op.cc | 2 +- paddle/fluid/operators/optimizers/sgd_op.cu | 18 +- paddle/fluid/operators/optimizers/sgd_op.h | 34 +-- .../optimizers/sparse_momentum_op.cc | 2 +- .../operators/optimizers/sparse_momentum_op.h | 32 ++- paddle/fluid/operators/p_norm_op_npu.cc | 14 +- paddle/fluid/operators/pad2d_op.cc | 17 +- paddle/fluid/operators/pad2d_op.cu | 15 +- paddle/fluid/operators/pad3d_op.cc | 6 +- paddle/fluid/operators/pad3d_op_npu.cc | 13 +- .../fluid/operators/pad_constant_like_op.cc | 2 - paddle/fluid/operators/pad_constant_like_op.h | 14 +- paddle/fluid/operators/pad_op.cc | 2 - paddle/fluid/operators/pad_op_npu.cc | 11 +- paddle/fluid/operators/partial_concat_op.cc | 4 +- paddle/fluid/operators/partial_concat_op.cu | 8 +- paddle/fluid/operators/partial_concat_op.h | 8 +- paddle/fluid/operators/partial_sum_op.cc | 4 +- paddle/fluid/operators/partial_sum_op.cu | 9 +- paddle/fluid/operators/partial_sum_op.h | 8 +- paddle/fluid/operators/pool_op.cc | 6 +- paddle/fluid/operators/pool_op.h | 6 +- paddle/fluid/operators/pool_op_mlu.cc | 32 +-- paddle/fluid/operators/pool_op_npu.cc | 14 +- .../operators/positive_negative_pair_op.h | 25 +- paddle/fluid/operators/prelu_op.cc | 2 +- paddle/fluid/operators/prroi_pool_op.cc | 2 +- paddle/fluid/operators/prroi_pool_op.cu | 28 +- paddle/fluid/operators/prroi_pool_op.h | 22 +- paddle/fluid/operators/pscore/fake_init_op.cc | 2 +- .../operators/pull_box_extended_sparse_op.h | 10 +- paddle/fluid/operators/pull_box_sparse_op.h | 6 +- paddle/fluid/operators/pull_gpups_sparse_op.h | 6 +- paddle/fluid/operators/put_along_axis_op.cc | 4 +- paddle/fluid/operators/pyramid_hash_op.cc | 10 +- paddle/fluid/operators/quantize_linear_op.cc | 10 +- paddle/fluid/operators/quantize_linear_op.cu | 6 +- paddle/fluid/operators/quantize_linear_op.h | 30 +-- paddle/fluid/operators/quantize_op.h | 1 - paddle/fluid/operators/random_routing_op.cu | 2 +- paddle/fluid/operators/randperm_op.h | 4 +- paddle/fluid/operators/randperm_op_mlu.cc | 4 +- paddle/fluid/operators/range_op.cc | 2 +- paddle/fluid/operators/range_op.h | 8 +- paddle/fluid/operators/range_op_mlu.cc | 10 +- paddle/fluid/operators/range_op_npu.cc | 10 +- paddle/fluid/operators/rank_attention_op.cc | 2 +- paddle/fluid/operators/rank_attention_op.cu | 34 +-- paddle/fluid/operators/rank_loss_op.h | 20 +- paddle/fluid/operators/recurrent_op.cc | 10 +- .../operators/reduce_ops/logsumexp_op_xpu.cc | 4 +- .../operators/reduce_ops/reduce_any_op_npu.cc | 6 +- .../reduce_ops/reduce_any_op_npu_test.cc | 2 +- .../operators/reduce_ops/reduce_max_op_mlu.cc | 6 +- .../operators/reduce_ops/reduce_max_op_npu.cc | 18 +- .../reduce_ops/reduce_mean_op_mlu.cc | 8 +- .../reduce_ops/reduce_mean_op_npu.cc | 12 +- .../operators/reduce_ops/reduce_min_op_mlu.cc | 6 +- .../operators/reduce_ops/reduce_min_op_npu.cc | 8 +- .../fluid/operators/reduce_ops/reduce_op.cu.h | 4 +- paddle/fluid/operators/reduce_ops/reduce_op.h | 70 ++--- .../operators/reduce_ops/reduce_op_function.h | 14 +- .../operators/reduce_ops/reduce_op_mlu.h | 4 +- .../operators/reduce_ops/reduce_op_xpu.h | 4 +- .../reduce_ops/reduce_prod_op_npu.cc | 8 +- .../operators/reduce_ops/reduce_sum_op.h | 13 +- .../operators/reduce_ops/reduce_sum_op_mlu.cc | 8 +- .../operators/reduce_ops/reduce_sum_op_npu.cc | 15 +- .../operators/reduce_ops/reduce_sum_op_xpu.cc | 7 +- .../fluid/operators/repeat_interleave_op.cc | 2 - paddle/fluid/operators/requantize_op.h | 1 - paddle/fluid/operators/reshape_op.cc | 18 +- paddle/fluid/operators/reshape_op_mlu.cc | 10 +- paddle/fluid/operators/reshape_op_npu.cc | 10 +- paddle/fluid/operators/rnn_op_mlu.cc | 68 ++--- paddle/fluid/operators/roi_align_op.cc | 2 +- paddle/fluid/operators/roi_align_op_mlu.cc | 14 +- paddle/fluid/operators/roi_align_op_npu.cc | 23 +- paddle/fluid/operators/roi_pool_op.cc | 2 +- paddle/fluid/operators/roll_op.cc | 2 - paddle/fluid/operators/row_conv_op.cc | 21 +- paddle/fluid/operators/row_conv_op.cu | 11 +- paddle/fluid/operators/rrelu_op.cc | 2 - paddle/fluid/operators/run_program_op.cc | 4 +- paddle/fluid/operators/sample_logits_op.cu | 38 +-- paddle/fluid/operators/sample_logits_op.h | 56 ++-- paddle/fluid/operators/sampling_id_op.cc | 2 +- paddle/fluid/operators/sampling_id_op.h | 6 +- paddle/fluid/operators/save_combine_op.cc | 4 +- paddle/fluid/operators/scale_op_mlu.cc | 10 +- paddle/fluid/operators/scale_op_npu.cc | 10 +- paddle/fluid/operators/scatter_nd_add_op.cc | 2 +- paddle/fluid/operators/scatter_op_mlu.cc | 8 +- paddle/fluid/operators/scatter_op_npu.cc | 12 +- paddle/fluid/operators/scatter_test.cc | 6 +- paddle/fluid/operators/search_compute.h | 2 +- paddle/fluid/operators/seed_op.cc | 2 +- paddle/fluid/operators/seed_op.cu | 2 +- paddle/fluid/operators/seed_op.h | 4 +- paddle/fluid/operators/seed_op_npu.cc | 2 +- .../sequence_ops/sequence_concat_op.h | 16 +- .../operators/sequence_ops/sequence_conv_op.h | 19 +- .../sequence_ops/sequence_conv_op_xpu.cc | 9 +- .../sequence_ops/sequence_mask_op.cc | 2 +- .../operators/sequence_ops/sequence_mask_op.h | 10 +- .../sequence_ops/sequence_mask_op_npu.cc | 10 +- .../operators/sequence_ops/sequence_pool_op.h | 10 +- .../sequence_ops/sequence_scatter_op.cc | 2 +- .../sequence_ops/sequence_scatter_op.h | 10 +- .../sequence_ops/sequence_slice_op.h | 18 +- .../sequence_softmax_cudnn_op.cu.cc | 2 +- .../sequence_ops/sequence_softmax_op.h | 2 +- .../sequence_topk_avg_pooling_op.h | 6 +- .../sequence_ops/sequence_unpad_op.h | 2 +- paddle/fluid/operators/set_value_op.cc | 4 +- paddle/fluid/operators/set_value_op.h | 2 +- paddle/fluid/operators/set_value_op_mlu.cc | 16 +- paddle/fluid/operators/set_value_op_npu.cc | 16 +- paddle/fluid/operators/shape_op.cc | 2 +- paddle/fluid/operators/shape_op_mlu.cc | 4 +- paddle/fluid/operators/shape_op_npu.cc | 6 +- paddle/fluid/operators/shard_index_op_npu.cc | 2 +- paddle/fluid/operators/share_buffer_op.cc | 2 +- paddle/fluid/operators/share_buffer_op.h | 4 +- paddle/fluid/operators/shuffle_batch_op.cc | 2 +- paddle/fluid/operators/shuffle_batch_op.cu | 18 +- paddle/fluid/operators/shuffle_batch_op.h | 2 +- paddle/fluid/operators/shuffle_channel_op.cu | 10 +- paddle/fluid/operators/shuffle_channel_op.h | 8 +- .../sigmoid_cross_entropy_with_logits_op.cc | 1 - ...igmoid_cross_entropy_with_logits_op_mlu.cc | 16 +- ...igmoid_cross_entropy_with_logits_op_npu.cc | 16 +- paddle/fluid/operators/similarity_focus_op.h | 6 +- paddle/fluid/operators/size_op.cc | 2 +- paddle/fluid/operators/size_op_mlu.cc | 4 +- paddle/fluid/operators/size_op_npu.cc | 4 +- paddle/fluid/operators/slice_op.cc | 11 +- paddle/fluid/operators/slice_op.h | 29 ++- paddle/fluid/operators/slice_op_mlu.cc | 33 ++- paddle/fluid/operators/slice_op_npu.cc | 33 ++- paddle/fluid/operators/smooth_l1_loss_op.h | 26 +- .../fluid/operators/smooth_l1_loss_op_npu.cc | 26 +- paddle/fluid/operators/softmax_op_mlu.cc | 2 +- paddle/fluid/operators/softmax_op_npu.cc | 2 +- .../softmax_with_cross_entropy_op_mlu.cc | 22 +- .../softmax_with_cross_entropy_op_npu.cc | 20 +- paddle/fluid/operators/space_to_depth_op.cc | 2 +- paddle/fluid/operators/sparse_attention_op.cu | 93 +++---- paddle/fluid/operators/spectral_norm_op.cc | 2 - paddle/fluid/operators/split_op.cc | 6 +- paddle/fluid/operators/split_op_mlu.cc | 10 +- paddle/fluid/operators/split_op_npu.cc | 6 +- paddle/fluid/operators/spp_op.h | 22 +- .../fluid/operators/squared_l2_distance_op.h | 18 +- paddle/fluid/operators/squared_l2_norm_op.cc | 2 - .../fluid/operators/squared_l2_norm_op_mlu.cc | 22 +- .../fluid/operators/squared_l2_norm_op_npu.cc | 14 +- paddle/fluid/operators/stack_op_mlu.cc | 6 +- paddle/fluid/operators/stack_op_npu.cc | 14 +- paddle/fluid/operators/stft_op.h | 14 +- paddle/fluid/operators/strided_memcpy.h | 6 +- paddle/fluid/operators/strided_slice_op.cc | 4 +- .../fluid/operators/strided_slice_op_mlu.cc | 38 +-- .../fluid/operators/strided_slice_op_npu.cc | 38 +-- .../operators/string/faster_tokenizer_op.cc | 2 +- .../operators/string/faster_tokenizer_op.h | 4 +- paddle/fluid/operators/sum_op.cc | 1 - paddle/fluid/operators/sum_op_mlu.cc | 4 +- paddle/fluid/operators/sum_op_npu.cc | 6 +- paddle/fluid/operators/sum_op_xpu.cc | 2 +- paddle/fluid/operators/svd_helper.h | 130 +++++----- .../fluid/operators/sync_batch_norm_op_mlu.cc | 43 ++-- .../fluid/operators/sync_batch_norm_op_npu.cc | 39 +-- paddle/fluid/operators/take_along_axis_op.cc | 4 +- .../fluid/operators/take_along_axis_op_npu.cc | 16 +- paddle/fluid/operators/tdm_child_op.h | 2 +- paddle/fluid/operators/tdm_sampler_op.h | 2 +- .../teacher_student_sigmoid_loss_op.cc | 2 +- .../teacher_student_sigmoid_loss_op.h | 19 +- paddle/fluid/operators/temporal_shift_op.cc | 2 - paddle/fluid/operators/temporal_shift_op.cu | 12 +- paddle/fluid/operators/temporal_shift_op.h | 8 +- .../operators/tensor_array_to_tensor_op.cc | 1 - .../operators/tensorrt/tensorrt_engine_op.h | 2 +- .../test_leaky_relu_grad_grad_functor.h | 21 +- paddle/fluid/operators/tile_op.cc | 6 +- paddle/fluid/operators/tile_op_functor.h | 6 +- paddle/fluid/operators/tile_op_mlu.cc | 8 +- paddle/fluid/operators/tile_op_npu.cc | 8 +- paddle/fluid/operators/top_k_function_cuda.h | 8 +- paddle/fluid/operators/top_k_op.cu | 20 +- paddle/fluid/operators/top_k_op.h | 20 +- paddle/fluid/operators/top_k_op_mlu.cc | 4 +- paddle/fluid/operators/top_k_op_npu.cc | 4 +- paddle/fluid/operators/top_k_op_xpu.cc | 10 +- paddle/fluid/operators/top_k_v2_op_mlu.cc | 4 +- paddle/fluid/operators/top_k_v2_op_npu.cc | 10 +- paddle/fluid/operators/transfer_layout_op.cc | 2 +- paddle/fluid/operators/transfer_layout_op.h | 4 +- paddle/fluid/operators/transpose_op.cc | 6 +- paddle/fluid/operators/transpose_op.cu.h | 14 +- paddle/fluid/operators/transpose_op.h | 4 +- paddle/fluid/operators/tree_conv_op.h | 24 +- paddle/fluid/operators/tril_triu_op_mlu.cc | 4 +- paddle/fluid/operators/tril_triu_op_npu.cc | 4 +- .../truncated_gaussian_random_op_mlu.cc | 4 +- .../truncated_gaussian_random_op_npu.cc | 6 +- paddle/fluid/operators/unbind_op.cc | 1 - .../uniform_random_inplace_op_xpu.cc | 2 +- paddle/fluid/operators/uniform_random_op.cc | 6 +- paddle/fluid/operators/uniform_random_op.cu | 6 +- paddle/fluid/operators/uniform_random_op.h | 16 +- .../fluid/operators/uniform_random_op_mlu.cc | 6 +- .../fluid/operators/uniform_random_op_npu.cc | 6 +- paddle/fluid/operators/unique_op.h | 76 +++--- .../fluid/operators/unique_with_counts_op.h | 8 +- paddle/fluid/operators/unsqueeze_op.cc | 2 +- paddle/fluid/operators/unsqueeze_op.h | 4 +- paddle/fluid/operators/unstack_op_mlu.cc | 8 +- paddle/fluid/operators/unstack_op_npu.cc | 12 +- paddle/fluid/operators/utils.h | 18 +- paddle/fluid/operators/var_conv_2d_op.cc | 8 +- paddle/fluid/operators/var_conv_2d_op.h | 2 +- paddle/fluid/operators/where_index_op_mlu.cc | 8 +- paddle/fluid/operators/where_index_op_npu.cc | 6 +- paddle/fluid/operators/where_op_mlu.cc | 8 +- paddle/fluid/operators/where_op_npu.cc | 18 +- .../platform/device/gpu/cuda/cudnn_desc.h | 7 +- .../platform/device/gpu/cuda/cudnn_helper.h | 2 +- .../platform/device/gpu/cudnn_desc_test.cc | 2 +- .../platform/device/gpu/rocm/miopen_desc.h | 1 - .../platform/device/gpu/rocm/miopen_helper.h | 2 +- .../fluid/platform/device/ipu/ipu_backend.cc | 4 +- .../fluid/platform/device/ipu/ipu_backend.h | 4 +- paddle/fluid/platform/device/ipu/ipu_utils.h | 2 +- .../fluid/platform/device/npu/npu_op_runner.h | 2 +- paddle/fluid/platform/device_code_test.cc | 12 +- paddle/fluid/platform/mkldnn_helper.h | 6 +- paddle/fluid/platform/mkldnn_reuse.h | 54 ++-- paddle/fluid/pybind/eager.cc | 18 +- paddle/fluid/pybind/eager_functions.cc | 2 +- paddle/fluid/pybind/eager_method.cc | 12 +- paddle/fluid/pybind/eager_utils.cc | 8 +- paddle/fluid/pybind/eager_utils.h | 2 +- paddle/fluid/pybind/imperative.cc | 10 +- paddle/fluid/pybind/inference_api.cc | 2 +- paddle/fluid/pybind/pybind.cc | 2 +- paddle/fluid/pybind/tensor.cc | 140 +++++----- paddle/fluid/pybind/tensor_py.h | 80 +++--- paddle/infrt/api/infrt_api.cc | 20 +- paddle/infrt/api/infrt_api.h | 4 +- paddle/infrt/api/infrt_api_test.cc.in | 8 +- paddle/infrt/backends/tensorrt/trt_engine.cc | 6 +- paddle/infrt/backends/tensorrt/trt_engine.h | 9 +- paddle/infrt/backends/tensorrt/trt_utils.h | 4 +- .../infrt/pass/infrt_weights_unfold_pass.cc | 2 +- paddle/infrt/host_context/kernel_frame.cc | 2 +- .../host_context/mlir_to_runtime_translate.cc | 6 +- paddle/infrt/host_context/value.cc | 4 +- paddle/infrt/host_context/value.h | 12 +- .../infrt/kernel/phi/dense_tensor_kernels.cc | 56 ++-- .../infrt/kernel/phi/dense_tensor_kernels.h | 21 +- .../infershaped/infershape_launchers_test.cc | 12 +- .../infershaped_kernel_launcher.cc | 6 +- .../phi/infershaped/infershaped_utils.h | 2 +- paddle/infrt/kernel/tensor_kernels.cc | 6 +- paddle/infrt/kernel/tensorrt/trt_helper.h | 2 +- paddle/infrt/kernel/tensorrt/trt_kernels.cc | 10 +- paddle/infrt/kernel/tensorrt/trt_kernels.h | 4 +- paddle/infrt/paddle/model_parser.cc | 4 +- paddle/infrt/paddle/model_parser.h | 2 +- paddle/infrt/tensor/phi/tensor_map.cc | 7 +- paddle/infrt/tensor/phi/tensor_map.h | 6 +- paddle/phi/api/include/tensor.h | 2 +- paddle/phi/api/lib/utils/tensor_utils.cc | 6 +- paddle/phi/api/lib/utils/tensor_utils.h | 4 +- .../phi/backends/custom/custom_device_test.cc | 14 +- paddle/phi/core/dense_tensor.h | 4 +- paddle/phi/core/dense_tensor.inl | 6 +- paddle/phi/core/dense_tensor_impl.cc | 2 +- paddle/phi/kernels/funcs/fc_functor.cc | 4 +- paddle/phi/kernels/funcs/math_function.cc | 36 +-- paddle/phi/kernels/funcs/math_function.cu | 28 +- paddle/phi/kernels/funcs/math_function.h | 42 ++- paddle/phi/kernels/funcs/math_function_impl.h | 46 ++-- paddle/phi/kernels/funcs/sequence2batch.cc | 4 +- paddle/phi/kernels/funcs/sequence2batch.cu | 4 +- paddle/phi/kernels/funcs/sequence2batch.h | 4 +- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 2 +- paddle/phi/kernels/gpu/depthwise_conv.h | 61 ++--- .../impl/average_accumulates_kernel_impl.h | 14 +- .../phi/tests/kernels/test_math_function.cc | 38 +-- .../phi/tests/kernels/test_math_function.cu | 100 ++++---- .../custom_op/custom_raw_op_kernel_op.cc | 9 +- .../custom_op/custom_raw_op_kernel_op.cu | 5 +- .../tests/custom_op/custom_raw_op_kernel_op.h | 10 +- 1042 files changed, 8120 insertions(+), 7816 deletions(-) diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.cc b/paddle/fluid/distributed/ps/service/brpc_utils.cc index b98e85f9c23e5..915a1ffa15f6b 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.cc +++ b/paddle/fluid/distributed/ps/service/brpc_utils.cc @@ -282,7 +282,7 @@ void DeserializeSelectedRows( const platform::DeviceContext& ctx) { const auto place = ctx.GetPlace(); auto* slr = var->GetMutable(); - framework::Tensor* tensor = slr->mutable_value(); + phi::DenseTensor* tensor = slr->mutable_value(); slr->set_height(msg.slr_height()); std::vector tmp_rows(msg.dims()[0]); memcpy(tmp_rows.data(), msg.data().data(), msg.dims()[0] * sizeof(int64_t)); diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 8026b8e368478..8bddb87d1fef0 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -248,7 +248,7 @@ class EagerVariable final { // Construct allocation only once. if (var_.IsInitialized()) { if (var_.IsType() || - var_.IsType()) { + var_.IsType()) { return SetImplWithLegacyTensor(); } else if (var_.IsType()) { return SetImplWithLegacyTensor(); @@ -286,7 +286,7 @@ class EagerVariable final { template void ConstructVariableFromTensor(const paddle::experimental::Tensor& tensor) { auto* framework_tensor = var_.GetMutable(); - // Contruct framework::Tensor from egr::EagerVariable + // Contruct phi::DenseTensor from egr::EagerVariable auto tensor_dense = std::dynamic_pointer_cast(tensor.impl()); PADDLE_ENFORCE_EQ( (tensor_dense.get() && tensor_dense), @@ -303,7 +303,7 @@ class EagerVariable final { void ConstructVariableFromCompatTensor( const paddle::experimental::Tensor& tensor) { auto* framework_holder = var_.GetMutable(); - // Contruct framework::Tensor from egr::EagerVariable + // Contruct phi::DenseTensor from egr::EagerVariable auto* compat_tensor = static_cast(tensor.impl().get()); PADDLE_ENFORCE_NOT_NULL(compat_tensor, diff --git a/paddle/fluid/framework/attribute_checker.h b/paddle/fluid/framework/attribute_checker.h index 24f3f0be96b6c..6552d167e1d01 100644 --- a/paddle/fluid/framework/attribute_checker.h +++ b/paddle/fluid/framework/attribute_checker.h @@ -246,7 +246,7 @@ class TypedAttrChecker { true, platform::errors::InvalidArgument( "Found Attribute('%s') with type(Variable), but it " - "doesn't support Tensor type.", + "doesn't support phi::DenseTensor type.", attr_name_)); VLOG(1) << "Found Attribute " << attr_name_ << " with type(Variable)."; diff --git a/paddle/fluid/framework/copy_same_tensor_test.cc b/paddle/fluid/framework/copy_same_tensor_test.cc index d4f36be5e87e7..10e0b76f00459 100644 --- a/paddle/fluid/framework/copy_same_tensor_test.cc +++ b/paddle/fluid/framework/copy_same_tensor_test.cc @@ -46,7 +46,7 @@ static bool CopySameTensorTestMain(const DDim &dims, FLAGS_use_system_allocator = true; // force to use system allocator // Step 1: create a cpu tensor and initialize it with random value; - Tensor src_cpu_tensor; + phi::DenseTensor src_cpu_tensor; { src_cpu_tensor.Resize(dims); auto *src_ptr_cpu = src_cpu_tensor.mutable_data(platform::CPUPlace()); @@ -60,9 +60,9 @@ static bool CopySameTensorTestMain(const DDim &dims, } // Step 2: copy the source tensor to dst place - Tensor dst_cpu_tensor; + phi::DenseTensor dst_cpu_tensor; { - Tensor src_tensor; + phi::DenseTensor src_tensor; TensorCopySync(src_cpu_tensor, src_place, &src_tensor); // The source tensor and dst_tensor is the same diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index e7ed9f2108128..8f778e4babe4e 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -133,8 +133,8 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, for (auto& in_name : inputs) { VLOG(3) << "Custom Operator: input name - " << in_name; if (detail::IsDuplicableVar(in_name)) { - // return const std::vector - auto vec_x = ctx.MultiInput(in_name); + // return const std::vector + auto vec_x = ctx.MultiInput(in_name); PADDLE_ENFORCE_NE(vec_x.empty(), true, platform::errors::NotFound( @@ -161,7 +161,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, } kernel_ctx.EmplaceBackInputs(std::move(custom_vec_in)); } else { - auto* x = ctx.Input(in_name); + auto* x = ctx.Input(in_name); PADDLE_ENFORCE_NOT_NULL( x, platform::errors::NotFound("Input tensor (%s) is nullptr.", in_name)); @@ -222,7 +222,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, VLOG(3) << "Custom Operator: push outputs into CustomOpKernelContext."; // cache the target tensor pointers - std::vector true_out_ptrs; + std::vector true_out_ptrs; for (size_t i = 0; i < outputs.size(); ++i) { auto out_name = outputs[i]; if (detail::IsDuplicableVar(out_name)) { @@ -231,7 +231,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "If custom operator's outputs contains `paddle::Vec(" ")` type, " "it only can hold one output.")); - auto vec_out = ctx.MultiOutput(out_name); + auto vec_out = ctx.MultiOutput(out_name); PADDLE_ENFORCE_NE(vec_out.empty(), true, platform::errors::NotFound( @@ -253,7 +253,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, } kernel_ctx.EmplaceBackOutputs(std::move(custom_vec_out)); } else { - auto* out = ctx.Output(out_name); + auto* out = ctx.Output(out_name); PADDLE_ENFORCE_NOT_NULL(out, platform::errors::NotFound( "Output tensor (%s) is nullptr.", out_name)); @@ -431,7 +431,7 @@ class CustomOperator : public OperatorWithKernel { */ framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const OpKernelType& expected_kernel_type) const override { return OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, @@ -511,7 +511,7 @@ class CustomOpMaker : public OpProtoAndCheckerMaker { AddComment(R"DOC( Custom Operator. -According to the Tensor operation function implemented by the user +According to the phi::DenseTensor operation function implemented by the user independently of the framework, it is encapsulated into a framework operator to adapt to various execution scenarios such as dynamic graph, mode static graph mode, and inference mode. diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc index e65ecff60edd7..c8c92e95ea3a5 100644 --- a/paddle/fluid/framework/data_device_transform.cc +++ b/paddle/fluid/framework/data_device_transform.cc @@ -16,9 +16,9 @@ limitations under the License. */ namespace paddle { namespace framework { -void TransDataDevice(const Tensor &in, +void TransDataDevice(const phi::DenseTensor &in, const platform::Place &dst_place, - Tensor *out) { + phi::DenseTensor *out) { VLOG(3) << "DeviceTransform in, src_place " << in.place() << " dst_place: " << dst_place; diff --git a/paddle/fluid/framework/data_device_transform.h b/paddle/fluid/framework/data_device_transform.h index cb6b5feab2fca..55130519c4a6a 100644 --- a/paddle/fluid/framework/data_device_transform.h +++ b/paddle/fluid/framework/data_device_transform.h @@ -21,9 +21,9 @@ limitations under the License. */ namespace paddle { namespace framework { -void TransDataDevice(const Tensor& in, +void TransDataDevice(const phi::DenseTensor& in, const platform::Place& dst_place, - Tensor* out); + phi::DenseTensor* out); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index cd76747c03599..3e017f9b39377 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -55,7 +55,7 @@ class TestOpWithKernel : public OperatorWithKernel { } else { VLOG(3) << "use default kernel"; return OpKernelType(proto::VarType::FP32, - ctx.Input("input")->place()); + ctx.Input("input")->place()); } } }; @@ -66,7 +66,7 @@ class TestKernel : public OpKernel { void Compute(const ExecutionContext& ctx) const { std::cout << ctx.DebugString() << std::endl; - const Tensor* input = ctx.Input("input"); + const phi::DenseTensor* input = ctx.Input("input"); std::cout << "input place:" << input->place() << std::endl; auto* output = ctx.Output("output"); @@ -158,7 +158,7 @@ TEST(Operator, CPUtoGPU) { paddle::platform::DeviceContextPool::Instance(); auto dev_ctx = pool.Get(cuda_place); - paddle::framework::Tensor output_tensor; + phi::DenseTensor output_tensor; paddle::framework::TensorCopy(output2->Get(), paddle::platform::CPUPlace(), *dev_ctx, diff --git a/paddle/fluid/framework/data_feed.proto b/paddle/fluid/framework/data_feed.proto index a7ab70948795f..18d4cc7d4dc5c 100644 --- a/paddle/fluid/framework/data_feed.proto +++ b/paddle/fluid/framework/data_feed.proto @@ -19,7 +19,7 @@ message Slot { required string type = 2; optional bool is_dense = 3 [ default = false ]; optional bool is_used = 4 [ default = false ]; - repeated int32 shape = 5; // we can define N-D Tensor + repeated int32 shape = 5; // we can define N-D phi::DenseTensor } message MultiSlotDesc { diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 3c6a89f2939a7..254a7abd66db5 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -55,8 +55,8 @@ void CastDataLayout::apply() { void TransDataLayout(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { PADDLE_ENFORCE( platform::places_are_same_class(kernel_type_for_var.place_, expected_kernel_type.place_), @@ -97,7 +97,8 @@ using dnnl::memory; using dnnl::primitive; using dnnl::reorder; -void* GetDataFromTensor(const Tensor& tensor, dnnl::memory::data_type type) { +void* GetDataFromTensor(const phi::DenseTensor& tensor, + dnnl::memory::data_type type) { switch (type) { case dnnl::memory::data_type::f32: return platform::to_void_cast(tensor.data()); @@ -117,8 +118,8 @@ void* GetDataFromTensor(const Tensor& tensor, dnnl::memory::data_type type) { void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { auto in_layout = kernel_type_for_var.data_layout_; auto out_layout = expected_kernel_type.data_layout_; auto place = expected_kernel_type.place_; @@ -139,8 +140,8 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, - const Tensor& in, - Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, platform::Place place, bool always_copy) { // Set default as NCHW in case not specified diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 5eb1f3ecb49a7..b30884fa6f0da 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -38,12 +38,12 @@ namespace framework { struct CastDataLayout { CastDataLayout(const platform::DeviceContext* ctx, const std::vector& axis, - const framework::Tensor& in, - framework::Tensor* out) + const phi::DenseTensor& in, + phi::DenseTensor* out) : in_(in), out_(out), ctx_(ctx), axis_(axis) {} - const framework::Tensor in_; - framework::Tensor* out_; + const phi::DenseTensor in_; + phi::DenseTensor* out_; const platform::DeviceContext* ctx_; const std::vector axis_; @@ -101,17 +101,17 @@ inline MKLDNNDataType ToMKLDNNDataType(proto::VarType::Type type) { void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, - const Tensor& in, - Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, platform::Place place, bool always_copy = false); void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, - const Tensor& in, - Tensor* out); + const phi::DenseTensor& in, + phi::DenseTensor* out); -void* GetDataFromTensor(const Tensor& tensor, MKLDNNDataType type); +void* GetDataFromTensor(const phi::DenseTensor& tensor, MKLDNNDataType type); #endif @@ -119,8 +119,8 @@ std::vector GetAxis(const DataLayout& from, const DataLayout& to); void TransDataLayout(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, - const Tensor& in, - Tensor* out); + const phi::DenseTensor& in, + phi::DenseTensor* out); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_layout_transform_test.cc b/paddle/fluid/framework/data_layout_transform_test.cc index f1ac6b5216ecd..0c329a3e8c222 100644 --- a/paddle/fluid/framework/data_layout_transform_test.cc +++ b/paddle/fluid/framework/data_layout_transform_test.cc @@ -18,8 +18,8 @@ TEST(DataTransform, DataLayoutFunction) { auto place = paddle::platform::CPUPlace(); - paddle::framework::Tensor in = paddle::framework::Tensor(); - paddle::framework::Tensor out = paddle::framework::Tensor(); + phi::DenseTensor in = phi::DenseTensor(); + phi::DenseTensor out = phi::DenseTensor(); in.mutable_data(phi::make_ddim({2, 3, 1, 2}), place); in.set_layout(paddle::framework::DataLayout::kNHWC); @@ -48,7 +48,7 @@ TEST(DataTransform, DataLayoutFunction) { #ifdef PADDLE_WITH_MKLDNN TEST(DataTransformBf16, GetDataFromTensorDNNL) { auto place = paddle::platform::CPUPlace(); - paddle::framework::Tensor in = paddle::framework::Tensor(); + phi::DenseTensor in = phi::DenseTensor(); in.mutable_data(phi::make_ddim({2, 3, 1, 2}), place); @@ -61,7 +61,7 @@ TEST(DataTransformBf16, GetDataFromTensorDNNL) { TEST(DataTransformInt32, GetDataFromTensorDNNL) { auto place = paddle::platform::CPUPlace(); - paddle::framework::Tensor in = paddle::framework::Tensor(); + phi::DenseTensor in = phi::DenseTensor(); in.mutable_data(phi::make_ddim({2, 3, 1, 2}), place); void* in_data = diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index 044bf1fca39e2..db8c3c8c86cb5 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -31,19 +31,19 @@ class Variable; namespace paddle { namespace framework { -static void PassTensorData(Tensor *from, Tensor *to) { +static void PassTensorData(phi::DenseTensor *from, phi::DenseTensor *to) { to->ShareDataWith(*from); - *from = Tensor(); + *from = phi::DenseTensor(); } void TransformData(const OpKernelType &expected_kernel_type, const OpKernelType &kernel_type_for_var, - const Tensor &input_tensor, - Tensor *output_tensor) { + const phi::DenseTensor &input_tensor, + phi::DenseTensor *output_tensor) { bool transformed = false; - Tensor in; + phi::DenseTensor in; in.ShareDataWith(input_tensor); - Tensor out; + phi::DenseTensor out; const DataLayout lin = kernel_type_for_var.data_layout_; const DataLayout lout = expected_kernel_type.data_layout_; // do layout transform @@ -120,7 +120,7 @@ void TransformData(const OpKernelType &expected_kernel_type, } void SetTensorToVariable(const Variable &in_var, - const Tensor &tensor, + const phi::DenseTensor &tensor, Variable *out_var) { if (in_var.IsType()) { auto &in_lod_tensor = in_var.Get(); diff --git a/paddle/fluid/framework/data_transform.h b/paddle/fluid/framework/data_transform.h index 7fe20beec7dd7..2fcea7803ed31 100644 --- a/paddle/fluid/framework/data_transform.h +++ b/paddle/fluid/framework/data_transform.h @@ -35,14 +35,14 @@ class Variable; void TransformData(const OpKernelType &expected_kernel_type, const OpKernelType &kernel_type_for_var, - const Tensor &input_tensor, - Tensor *out); + const phi::DenseTensor &input_tensor, + phi::DenseTensor *out); /** * Set OutVar from InVar, except the tensor is shared with `tensor` */ void SetTensorToVariable(const Variable &in_var, - const Tensor &tensor, + const phi::DenseTensor &tensor, Variable *out_var); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 01802c11d5219..9f36bd4636890 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -20,13 +20,12 @@ #include "paddle/fluid/framework/tensor.h" TEST(DataType, float16) { - using paddle::framework::Tensor; using paddle::platform::CPUPlace; using paddle::platform::float16; namespace f = paddle::framework; f::proto::VarType::Type dtype = f::proto::VarType::FP16; - Tensor tensor; + phi::DenseTensor tensor; CPUPlace cpu; tensor.mutable_data(cpu, f::TransToPhiDataType(dtype)); @@ -43,13 +42,12 @@ TEST(DataType, float16) { } TEST(DataType, bfloat16) { - using paddle::framework::Tensor; using paddle::platform::bfloat16; using paddle::platform::CPUPlace; namespace f = paddle::framework; f::proto::VarType::Type dtype = f::proto::VarType::BF16; - Tensor tensor; + phi::DenseTensor tensor; CPUPlace cpu; tensor.mutable_data(cpu, f::TransToPhiDataType(dtype)); diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index 59d20306c665a..e7abe21daeb5e 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -35,8 +35,8 @@ struct CastDataTypeFunctor { #if defined(PADDLE_WITH_XPU) template -static void XPUCastData(const framework::Tensor& in, - framework::Tensor* out, +static void XPUCastData(const phi::DenseTensor& in, + phi::DenseTensor* out, const platform::XPUDeviceContext* dev_ctx) { using XPUInTDType = typename XPUTypeTrait::Type; using XPUOutTDType = typename XPUTypeTrait::Type; @@ -51,8 +51,8 @@ static void XPUCastData(const framework::Tensor& in, template static void XPUTransDataType( - const framework::Tensor& in, - framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const paddle::framework::proto::VarType::Type& dst_type, const platform::DeviceContext* ctx) { auto* context = static_cast(ctx); @@ -79,12 +79,12 @@ static void XPUTransDataType( template struct CastDataType { - CastDataType(const framework::Tensor& in, - framework::Tensor* out, + CastDataType(const phi::DenseTensor& in, + phi::DenseTensor* out, const platform::DeviceContext* ctx) : in_(in), out_(out), ctx_(ctx) {} - const framework::Tensor in_; - framework::Tensor* out_; + const phi::DenseTensor in_; + phi::DenseTensor* out_; const platform::DeviceContext* ctx_; template @@ -121,8 +121,8 @@ struct CastDataType { void TransDataType(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { PADDLE_ENFORCE_EQ( framework::TransToProtoVarType(in.dtype()), kernel_type_for_var.data_type_, @@ -135,9 +135,9 @@ void TransDataType(const OpKernelType& kernel_type_for_var, TransDataType(in, dst_type, out); } -void TransDataType(const Tensor& in, +void TransDataType(const phi::DenseTensor& in, const paddle::framework::proto::VarType::Type& type, - Tensor* out) { + phi::DenseTensor* out) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); out->Resize(in.dims()); @@ -213,8 +213,8 @@ void TransDataType(const Tensor& in, void TransComplexToReal(const proto::VarType::Type& dst_type, const proto::VarType::Type& src_type, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { auto& pool = platform::DeviceContextPool::Instance(); auto* ctx = pool.Get(in.place()); out->Resize(in.dims()); diff --git a/paddle/fluid/framework/data_type_transform.h b/paddle/fluid/framework/data_type_transform.h index b6449861369a2..619e15b6045e8 100644 --- a/paddle/fluid/framework/data_type_transform.h +++ b/paddle/fluid/framework/data_type_transform.h @@ -30,11 +30,11 @@ using KernelTypePair = std::pair; void TransDataType(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, - const Tensor& in, - Tensor* out); -void TransDataType(const Tensor& in, + const phi::DenseTensor& in, + phi::DenseTensor* out); +void TransDataType(const phi::DenseTensor& in, const paddle::framework::proto::VarType::Type& type, - Tensor* out); + phi::DenseTensor* out); /** * Transform complex gradient to real data type. @@ -49,8 +49,8 @@ void TransDataType(const Tensor& in, */ void TransComplexToReal(const proto::VarType::Type& dst_type, const proto::VarType::Type& src_type, - const Tensor& in, - Tensor* out); + const phi::DenseTensor& in, + phi::DenseTensor* out); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/data_type_transform_test.cc b/paddle/fluid/framework/data_type_transform_test.cc index 64d91611ab40a..cfdcb18a841b8 100644 --- a/paddle/fluid/framework/data_type_transform_test.cc +++ b/paddle/fluid/framework/data_type_transform_test.cc @@ -63,8 +63,8 @@ TEST(DataTypeTransform, CPUTransform) { // data type transform from float32 { - paddle::framework::Tensor in; - paddle::framework::Tensor out; + phi::DenseTensor in; + phi::DenseTensor out; float* ptr = in.mutable_data(phi::make_ddim({2, 3}), place); int data_number = 2 * 3; @@ -88,8 +88,8 @@ TEST(DataTypeTransform, CPUTransform) { // data type transform from/to float16 { - paddle::framework::Tensor in; - paddle::framework::Tensor out; + phi::DenseTensor in; + phi::DenseTensor out; paddle::platform::float16* ptr = in.mutable_data( phi::make_ddim({2, 3}), place); @@ -201,8 +201,8 @@ TEST(DataTypeTransform, CPUTransform) { // data type transform from/to bfloat16 { - paddle::framework::Tensor in; - paddle::framework::Tensor out; + phi::DenseTensor in; + phi::DenseTensor out; paddle::platform::bfloat16* ptr = in.mutable_data(phi::make_ddim({2, 3}), @@ -315,8 +315,8 @@ TEST(DataTypeTransform, CPUTransform) { // data type transform from/to int32 { - paddle::framework::Tensor in; - paddle::framework::Tensor out; + phi::DenseTensor in; + phi::DenseTensor out; int32_t* ptr = in.mutable_data(phi::make_ddim({2, 3}), place); int data_number = 2 * 3; diff --git a/paddle/fluid/framework/data_type_transform_test.cu b/paddle/fluid/framework/data_type_transform_test.cu index 8490afd69d9ea..0a808cfdbf738 100644 --- a/paddle/fluid/framework/data_type_transform_test.cu +++ b/paddle/fluid/framework/data_type_transform_test.cu @@ -62,10 +62,10 @@ TEST(DataTypeTransform, GPUTransform) { // data type transform from float32 { - paddle::framework::Tensor in; - paddle::framework::Tensor in_gpu; - paddle::framework::Tensor out_gpu; - paddle::framework::Tensor out; + phi::DenseTensor in; + phi::DenseTensor in_gpu; + phi::DenseTensor out_gpu; + phi::DenseTensor out; float* in_ptr = in.mutable_data(phi::make_ddim({2, 3}), cpu_place); float arr[6] = {0, 1, 2, 3, 4, 5}; @@ -97,10 +97,10 @@ TEST(DataTypeTransform, GPUTransform) { // data type transform from/to float16 { - paddle::framework::Tensor in; - paddle::framework::Tensor in_gpu; - paddle::framework::Tensor out_gpu; - paddle::framework::Tensor out; + phi::DenseTensor in; + phi::DenseTensor in_gpu; + phi::DenseTensor out_gpu; + phi::DenseTensor out; paddle::platform::float16* ptr = in.mutable_data( phi::make_ddim({2, 3}), cpu_place); diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 01f707eb9baaf..293ef3492691c 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -248,7 +248,7 @@ void AllReduceOpHandle::AllReduceFunc( ->FindVar(out_var_names[0]) ->GetMutable(); - // Reduce All Tensor to trg in CPU + // Reduce All phi::DenseTensor to trg in CPU ReduceBufferData func(lod_tensor_data, trg.data(), numel); VisitDataType(framework::TransToProtoVarType(trg.dtype()), func); diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 18b9dc3ffac9f..d28f81f3556cc 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -62,7 +62,7 @@ void BroadcastOpHandle::BroadcastOneVar( in_var, platform::errors::NotFound("Variable %s is not found in scopes.", in_var_handle.name())); - Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); + phi::DenseTensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); if (UNLIKELY(!in_tensor.IsInitialized())) { VLOG(3) << "in var " << in_var_handle.name() << "not inited, return!"; return; @@ -236,7 +236,7 @@ void BroadcastOpHandle::InitOutputValue( auto *in_var = var_scopes.at(in_var_handle.scope_idx())->FindVar(in_var_handle.name()); - Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); + phi::DenseTensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); // NOTE: The tensors' Place of input and output must be all on GPU or all on // CPU. diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.h b/paddle/fluid/framework/details/broadcast_op_handle_test.h index 154bf2b354e1a..f0825196e4478 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.h +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h @@ -286,7 +286,7 @@ struct TestBroadcastOpHandle { } p::CPUPlace cpu_place; - f::Tensor result_tensor; + phi::DenseTensor result_tensor; f::TensorCopySync(rt, cpu_place, &result_tensor); float* ct = result_tensor.data(); @@ -312,7 +312,7 @@ struct TestBroadcastOpHandle { "the expected, expect %s, but got %s.", lod, tensor.lod())); - f::Tensor result_tensor; + phi::DenseTensor result_tensor; f::TensorCopySync(tensor, cpu_place, &result_tensor); float* ct = result_tensor.mutable_data(cpu_place); for (int64_t k = 0; k < phi::product(kDims); ++k) { diff --git a/paddle/fluid/framework/details/build_strategy_test.cc b/paddle/fluid/framework/details/build_strategy_test.cc index 11c0746acc7b7..4b184ba552898 100644 --- a/paddle/fluid/framework/details/build_strategy_test.cc +++ b/paddle/fluid/framework/details/build_strategy_test.cc @@ -52,7 +52,8 @@ class SumOpWithKernel : public OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override {} OpKernelType GetExpectedKernelType( const ExecutionContext &ctx) const override { - return OpKernelType(proto::VarType::FP32, ctx.Input("X")->place()); + return OpKernelType(proto::VarType::FP32, + ctx.Input("X")->place()); } }; diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc index adf49c81c049a..ec49510e0d41b 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc @@ -130,8 +130,8 @@ static void CheckTensorAttrs(const LoDTensor *tensor, offset)); } -static void TransData(const framework::Tensor *src_item, - framework::Tensor *dst_item, +static void TransData(const phi::DenseTensor *src_item, + phi::DenseTensor *dst_item, const platform::DeviceContext &ctx) { if (src_item->IsInitialized() && src_item->numel() > 0) { if (platform::is_gpu_place(src_item->place())) { diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc index 91959d5146be6..50b34b57ec5c0 100644 --- a/paddle/fluid/framework/details/gather_op_handle.cc +++ b/paddle/fluid/framework/details/gather_op_handle.cc @@ -77,7 +77,7 @@ void GatherOpHandle::RunImpl() { auto &pre_in_value = pre_in_var->Get(); std::vector out_rows; - std::vector in_tensors; + std::vector in_tensors; // Gather the inputs for (auto *in_handle : in_var_handles) { @@ -121,7 +121,7 @@ void GatherOpHandle::RunImpl() { out_dim[0] = static_cast(rows); out_value->mutable_value()->Resize(out_dim).mutable_data( t_out_p, pre_in_value.value().dtype()); - Tensor *out_tensor = out_value->mutable_value(); + phi::DenseTensor *out_tensor = out_value->mutable_value(); // copy auto dev_ctx = dev_ctxes_.at(out_var_handle->place()); diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index 45d8939f788a0..3437eb5570dc7 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -204,7 +204,7 @@ struct TestGatherOpHandle { out_select_rows.rows()[k])); } - f::Tensor result_tensor; + phi::DenseTensor result_tensor; f::TensorCopy(rt, cpu_place, *(ctxs_[output_scope_idx]), &result_tensor); float* ct = result_tensor.data(); diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index bce7b64e6d735..e749d1568ff4b 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -332,7 +332,7 @@ void TensorCheckerVisitor::apply( template <> void tensor_check(const std::string& op_type, const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const platform::Place& place) { TensorCheckerVisitor vistor( op_type, var_name, tensor, place); @@ -348,7 +348,7 @@ void CheckVarHasNanOrInf(const std::string& op_type, platform::errors::NotFound( "Cannot find var: `%s` in op `%s`.", var_name, op_type)); - const Tensor* tensor{nullptr}; + const phi::DenseTensor* tensor{nullptr}; if (var->IsType()) { tensor = &var->Get(); } else if (var->IsType()) { @@ -371,7 +371,8 @@ void CheckVarHasNanOrInf(const std::string& op_type, tensor_check(op_type, var_name, *tensor, place); #else PADDLE_THROW(platform::errors::PreconditionNotMet( - "Tensor[%s] use gpu place. PaddlePaddle must compile with GPU.", + "phi::DenseTensor[%s] use gpu place. PaddlePaddle must compile with " + "GPU.", var_name)); #endif return; @@ -400,10 +401,13 @@ void CheckVarHasNanOrInf(const std::string& op_type, flag, true, platform::errors::Fatal( - "Operator %s output Tensor %s contains Inf.", op_type, var_name)); + "Operator %s output phi::DenseTensor %s contains Inf.", + op_type, + var_name)); #else PADDLE_THROW(platform::errors::PreconditionNotMet( - "Tensor[%s] use xpu place. PaddlePaddle must compile with XPU.", + "phi::DenseTensor[%s] use xpu place. PaddlePaddle must compile with " + "XPU.", var_name)); #endif return; @@ -431,10 +435,13 @@ void CheckVarHasNanOrInf(const std::string& op_type, flag, true, platform::errors::Fatal( - "Operator %s output Tensor %s contains Inf.", op_type, var_name)); + "Operator %s output phi::DenseTensor %s contains Inf.", + op_type, + var_name)); #else PADDLE_THROW(platform::errors::PreconditionNotMet( - "Tensor[%s] use npu place. PaddlePaddle must compile with NPU.", + "phi::DenseTensor[%s] use npu place. PaddlePaddle must compile with " + "NPU.", var_name)); #endif return; @@ -473,8 +480,8 @@ using NpuOpRunner = paddle::operators::NpuOpRunner; constexpr int FLOAT_STATUS_SIZE = 8; -static framework::Tensor& npu_float_status() { - static framework::Tensor float_status; +static phi::DenseTensor& npu_float_status() { + static phi::DenseTensor float_status; return float_status; } @@ -494,7 +501,7 @@ void NPUAllocAndClearFloatStatus(const framework::OperatorBase& op, flag.mutable_data({FLOAT_STATUS_SIZE}, place); NpuOpRunner("NPUAllocFloatStatus", {}, {flag}).Run(stream); - framework::Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data({FLOAT_STATUS_SIZE}, place); NpuOpRunner("NPUClearFloatStatus", {tmp}, {flag}).Run(stream); } @@ -503,7 +510,7 @@ void PrintNpuVarInfo(const std::string& op_type, const std::string& var_name, const framework::Variable* var, const platform::Place& place) { - const Tensor* tensor{nullptr}; + const phi::DenseTensor* tensor{nullptr}; if (var->IsType()) { tensor = &var->Get(); } else if (var->IsType()) { @@ -528,7 +535,7 @@ void PrintNpuVarInfo(const std::string& op_type, VLOG(10) << "begin check " << op_type << " var_name:" << var_name << ", place:" << tensor->place() << ", numel:" << tensor->numel(); - framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; cpu_tensor.Resize(tensor->dims()); cpu_tensor.mutable_data(platform::CPUPlace(), tensor->dtype()); framework::TensorCopySync(*tensor, platform::CPUPlace(), &cpu_tensor); @@ -575,13 +582,13 @@ static void NPUCheckOpHasNanOrInf(const framework::OperatorBase& op, auto stream = dev_ctx->stream(); auto& flag = npu_float_status(); - Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data({FLOAT_STATUS_SIZE}, place); // NPUGetFloatStatus updates data on input in-place. // tmp is only placeholder. NpuOpRunner("NPUGetFloatStatus", {flag}, {tmp}).Run(stream); - framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; auto cpu_place = platform::CPUPlace(); float* cpu_data = static_cast( cpu_tensor.mutable_data({FLOAT_STATUS_SIZE}, cpu_place)); diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu index 4aa24f8cb6ab8..57552a16cc5f4 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu @@ -230,7 +230,7 @@ void TensorCheckerVisitor::apply( template <> void tensor_check(const std::string& op_type, const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const platform::Place& place) { std::call_once(init_multi_gpu_op_var_map_flag, InitMultiGPUOpVarMap); diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.h b/paddle/fluid/framework/details/nan_inf_utils_detail.h index 99186c43e129e..2a25bc7b68f36 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.h +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.h @@ -28,7 +28,7 @@ template struct TensorCheckerVisitor { TensorCheckerVisitor(const std::string& op_type, const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const platform::Place& place) : op_type_(op_type), var_name_(var_name), @@ -51,14 +51,14 @@ struct TensorCheckerVisitor { std::string op_type_; std::string var_name_; - const framework::Tensor& tensor_; + const phi::DenseTensor& tensor_; const platform::Place& place_; }; template void tensor_check(const std::string& op_type, const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const platform::Place& place); } // namespace details diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h index de53a5de99b96..a1715062d4cb5 100644 --- a/paddle/fluid/framework/details/reduce_and_gather.h +++ b/paddle/fluid/framework/details/reduce_and_gather.h @@ -165,7 +165,7 @@ struct GatherLocalSelectedRowsFunctor { private: const std::map &dev_ctxes_; std::vector in_places_; - std::vector in_tensors_; + std::vector in_tensors_; platform::Place out_place_; phi::SelectedRows *dst_selected_rows_; diff --git a/paddle/fluid/framework/details/reduce_op_handle_test.cc b/paddle/fluid/framework/details/reduce_op_handle_test.cc index 7f38629f4e606..d35f9360637e6 100644 --- a/paddle/fluid/framework/details/reduce_op_handle_test.cc +++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc @@ -228,7 +228,7 @@ struct TestReduceOpHandle { out_select_rows.rows()[k])); } - f::Tensor result_tensor; + phi::DenseTensor result_tensor; f::TensorCopySync(rt, cpu_place, &result_tensor); float *ct = result_tensor.data(); @@ -279,7 +279,7 @@ struct TestReduceOpHandle { auto &rt = out_var->Get(); - f::Tensor result_tensor; + phi::DenseTensor result_tensor; f::TensorCopySync(rt, cpu_place, &result_tensor); float *ct = result_tensor.data(); diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index b453e7c4a813e..caffeba538dae 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -43,13 +43,13 @@ ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} struct ScaleLossGradFunctor { float coeff_; - Tensor *out_; + phi::DenseTensor *out_; platform::Place place_; proto::VarType::Type out_dtype_; platform::DeviceContext *ctx_; ScaleLossGradFunctor(float coeff, - Tensor *out, + phi::DenseTensor *out, platform::Place place, proto::VarType::Type dtype, platform::DeviceContext *ctx) diff --git a/paddle/fluid/framework/details/scope_buffered_monitor.cc b/paddle/fluid/framework/details/scope_buffered_monitor.cc index 9a92ae19f9425..7f9f7a537313f 100644 --- a/paddle/fluid/framework/details/scope_buffered_monitor.cc +++ b/paddle/fluid/framework/details/scope_buffered_monitor.cc @@ -31,7 +31,7 @@ namespace details { static constexpr double kMB = 1 / (1024 * 1024); static void GetTensors(Variable *var, - std::unordered_set *tensor_set) { + std::unordered_set *tensor_set) { if (var->IsType() && var->Get().IsInitialized()) { tensor_set->insert(var->GetMutable()); } else if (var->IsType() && @@ -47,7 +47,8 @@ static void GetTensors(Variable *var, } } -static void GetTensors(Scope *scope, std::unordered_set *tensor_set) { +static void GetTensors(Scope *scope, + std::unordered_set *tensor_set) { for (auto &var_name : scope->LocalVarNames()) { GetTensors(scope->FindVar(var_name), tensor_set); } @@ -58,7 +59,7 @@ static void GetTensors(Scope *scope, std::unordered_set *tensor_set) { } static size_t GetTensorMemorySize(Scope *scope, bool clear_cpu_tensor) { - std::unordered_set tensor_set; + std::unordered_set tensor_set; GetTensors(scope, &tensor_set); size_t memory_size = 0; std::unordered_set allocation_set; diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h index 0ce66b9a0c7e7..d92bc0f0b0b1b 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h @@ -40,7 +40,7 @@ namespace framework { namespace details { // TODO(zjl): support SelectedRows -static inline const Tensor &GetTensorFromVar(const Variable *var) { +static inline const phi::DenseTensor &GetTensorFromVar(const Variable *var) { if (var->IsType()) { return var->Get(); } else { @@ -50,7 +50,7 @@ static inline const Tensor &GetTensorFromVar(const Variable *var) { } } -static inline Tensor *GetMutableTensorFromVar(Variable *var) { +static inline phi::DenseTensor *GetMutableTensorFromVar(Variable *var) { if (var->IsType()) { return var->GetMutable(); } else { diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc index 670c0b054c4cb..a495e405014ff 100644 --- a/paddle/fluid/framework/details/variable_visitor.cc +++ b/paddle/fluid/framework/details/variable_visitor.cc @@ -56,7 +56,7 @@ static void VisitVariable(const Variable& var, Func* func) { } struct TensorVisitor { - Tensor* result_{nullptr}; + phi::DenseTensor* result_{nullptr}; void operator()(LoDTensor* tensor) { result_ = tensor; } @@ -71,7 +71,7 @@ struct TensorVisitor { } }; -Tensor& VariableVisitor::GetMutableTensor(Variable* var) { +phi::DenseTensor& VariableVisitor::GetMutableTensor(Variable* var) { TensorVisitor vistor; VisitVariable(var, &vistor); return *vistor.result_; diff --git a/paddle/fluid/framework/details/variable_visitor.h b/paddle/fluid/framework/details/variable_visitor.h index a689c47a1611f..12a18c73f5d4d 100644 --- a/paddle/fluid/framework/details/variable_visitor.h +++ b/paddle/fluid/framework/details/variable_visitor.h @@ -29,7 +29,7 @@ namespace details { class VariableVisitor { public: - static Tensor &GetMutableTensor(Variable *var); + static phi::DenseTensor &GetMutableTensor(Variable *var); static void ShareDimsAndLoD(const Variable &src, Variable *trg); diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc index 34aa34a058e92..e3115b58f1fb1 100644 --- a/paddle/fluid/framework/device_worker.cc +++ b/paddle/fluid/framework/device_worker.cc @@ -32,7 +32,7 @@ void DeviceWorker::SetDataFeed(DataFeed* data_feed) { } template -std::string PrintLodTensorType(Tensor* tensor, +std::string PrintLodTensorType(phi::DenseTensor* tensor, int64_t start, int64_t end, char separator = ',', @@ -55,10 +55,10 @@ std::string PrintLodTensorType(Tensor* tensor, return os.str(); } template -void PrintLodTensorType(Tensor* tensor, +void PrintLodTensorType(phi::DenseTensor* tensor, int64_t start, int64_t end, - std::string& out_val, + std::string& out_val, // NOLINT char separator = ',', bool need_leading_separator = true) { auto count = tensor->numel(); @@ -84,10 +84,10 @@ void PrintLodTensorType(Tensor* tensor, #define FLOAT_EPS 1e-8 #define MAX_FLOAT_BUFF_SIZE 40 template <> -void PrintLodTensorType(Tensor* tensor, +void PrintLodTensorType(phi::DenseTensor* tensor, int64_t start, int64_t end, - std::string& out_val, + std::string& out_val, // NOLINT char separator, bool need_leading_separator) { char buf[MAX_FLOAT_BUFF_SIZE]; @@ -101,15 +101,15 @@ void PrintLodTensorType(Tensor* tensor, for (int64_t i = start; i < end; i++) { if (i != start || need_leading_separator) out_val += separator; if (tensor->data()[i] > -FLOAT_EPS && - tensor->data()[i] < FLOAT_EPS) + tensor->data()[i] < FLOAT_EPS) { out_val += "0"; - else { - sprintf(buf, "%.9f", tensor->data()[i]); + } else { + sprintf(buf, "%.9f", tensor->data()[i]); // NOLINT out_val += buf; } } } -std::string PrintLodTensorIntType(Tensor* tensor, +std::string PrintLodTensorIntType(phi::DenseTensor* tensor, int64_t start, int64_t end, char separator = ',', @@ -132,10 +132,10 @@ std::string PrintLodTensorIntType(Tensor* tensor, return os.str(); } -void PrintLodTensorIntType(Tensor* tensor, +void PrintLodTensorIntType(phi::DenseTensor* tensor, int64_t start, int64_t end, - std::string& out_val, + std::string& out_val, // NOLINT char separator = ',', bool need_leading_separator = true) { auto count = tensor->numel(); @@ -160,7 +160,7 @@ void PrintLodTensorIntType(Tensor* tensor, // return os.str(); } -std::string PrintLodTensor(Tensor* tensor, +std::string PrintLodTensor(phi::DenseTensor* tensor, int64_t start, int64_t end, char separator, @@ -183,10 +183,10 @@ std::string PrintLodTensor(Tensor* tensor, return out_val; } -void PrintLodTensor(Tensor* tensor, +void PrintLodTensor(phi::DenseTensor* tensor, int64_t start, int64_t end, - std::string& out_val, + std::string& out_val, // NOLINT char separator, bool need_leading_separator) { if (framework::TransToProtoVarType(tensor->dtype()) == proto::VarType::FP32) { @@ -361,7 +361,7 @@ void DeviceWorker::DumpField(const Scope& scope, continue; } size_t acutal_thread_num = - std::min((size_t)batch_size, tensor_iterator_thread_num); + std::min(static_cast(batch_size), tensor_iterator_thread_num); for (size_t i = 0; i < acutal_thread_num; i++) { size_t average_size = batch_size / acutal_thread_num; size_t begin = @@ -378,7 +378,7 @@ void DeviceWorker::DumpField(const Scope& scope, VLOG(1) << "writing a batch takes " << tt.count() << " us"; size_t acutal_thread_num = - std::min((size_t)batch_size, tensor_iterator_thread_num); + std::min(static_cast(batch_size), tensor_iterator_thread_num); for (size_t i = 0; i < acutal_thread_num; i++) { size_t average_size = batch_size / acutal_thread_num; size_t begin = diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index 6b3766e580fae..6276d0c5003da 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -31,7 +31,6 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/wrapper/fleet.h" #endif -#include #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/heter_util.h" @@ -60,15 +59,15 @@ class Scope; namespace paddle { namespace framework { -std::string PrintLodTensor(Tensor* tensor, +std::string PrintLodTensor(phi::DenseTensor* tensor, int64_t start, int64_t end, char separator = ',', bool need_leading_separator = false); -void PrintLodTensor(Tensor* tensor, +void PrintLodTensor(phi::DenseTensor* tensor, int64_t start, int64_t end, - std::string& output_str, + std::string& output_str, // NOLINT char separator = ',', bool need_leading_separator = false); std::pair GetTensorBound(LoDTensor* tensor, int index); diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index b7bca733b8f9e..32c6e17143fa2 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -134,7 +134,7 @@ struct DLDeviceVisitor }; } // namespace internal -DLPackTensor::DLPackTensor(const Tensor &tensor, LaneType lanes) { +DLPackTensor::DLPackTensor(const phi::DenseTensor &tensor, LaneType lanes) { // init data, data buffer t_.data = const_cast(tensor.data()); diff --git a/paddle/fluid/framework/dlpack_tensor.h b/paddle/fluid/framework/dlpack_tensor.h index ff4cf23da6e96..c6fca6707fad2 100644 --- a/paddle/fluid/framework/dlpack_tensor.h +++ b/paddle/fluid/framework/dlpack_tensor.h @@ -28,7 +28,7 @@ class DLPackTensor { std::remove_reference::type; // int64_t // lanes is only used in CPU to enable vectorization - explicit DLPackTensor(const Tensor& tensor, LaneType lanes = 1); + explicit DLPackTensor(const phi::DenseTensor& tensor, LaneType lanes = 1); inline operator const ::DLTensor&() const { return t_; } diff --git a/paddle/fluid/framework/dlpack_tensor_test.cc b/paddle/fluid/framework/dlpack_tensor_test.cc index 9e3604e71a245..0ccc5bb4ad1a4 100644 --- a/paddle/fluid/framework/dlpack_tensor_test.cc +++ b/paddle/fluid/framework/dlpack_tensor_test.cc @@ -47,7 +47,7 @@ constexpr uint8_t GetDLDataTypeCode() { template void TestMain(const platform::Place &place, uint16_t lanes) { DDim dims{4, 5, 6, 7}; - Tensor tensor; + phi::DenseTensor tensor; tensor.Resize(dims); void *p = tensor.mutable_data(place); @@ -85,7 +85,7 @@ void TestMain(const platform::Place &place, uint16_t lanes) { template void TestToDLManagedTensor(const platform::Place &place, uint16_t lanes) { DDim dims{6, 7}; - Tensor tensor; + phi::DenseTensor tensor; tensor.Resize(dims); tensor.mutable_data(place); diff --git a/paddle/fluid/framework/downpour_lite_worker.cc b/paddle/fluid/framework/downpour_lite_worker.cc index bd2c404a6fd2a..cb082f6385653 100644 --- a/paddle/fluid/framework/downpour_lite_worker.cc +++ b/paddle/fluid/framework/downpour_lite_worker.cc @@ -314,11 +314,11 @@ void DownpourLiteWorker::TrainFilesWithProfiler() { PADDLE_ENFORCE_EQ(framework::TensorContainsInf(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains Inf.", var_name)); + "phi::DenseTensor %s contains Inf.", var_name)); PADDLE_ENFORCE_EQ(framework::TensorContainsNAN(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains NAN.", var_name)); + "phi::DenseTensor %s contains NAN.", var_name)); } #if defined(PADDLE_WITH_PSLIB) || defined(PADDLE_WITH_PSCORE) @@ -487,7 +487,7 @@ void DownpourLiteWorker::TrainFiles() { if (var == nullptr) { continue; } - Tensor* tensor = nullptr; + phi::DenseTensor* tensor = nullptr; int64_t len = 0; if (var->IsType()) { tensor = var->GetMutable(); @@ -534,11 +534,11 @@ void DownpourLiteWorker::TrainFiles() { PADDLE_ENFORCE_EQ(framework::TensorContainsInf(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains Inf.", var_name)); + "phi::DenseTensor %s contains Inf.", var_name)); PADDLE_ENFORCE_EQ(framework::TensorContainsNAN(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains NAN.", var_name)); + "phi::DenseTensor %s contains NAN.", var_name)); } #if defined(PADDLE_WITH_PSLIB) || defined(PADDLE_WITH_PSCORE) diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index 0bd577d2aa6c0..7ae37052be1f5 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -584,11 +584,11 @@ void DownpourWorker::TrainFilesWithProfiler() { PADDLE_ENFORCE_EQ(framework::TensorContainsInf(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains Inf.", var_name)); + "phi::DenseTensor %s contains Inf.", var_name)); PADDLE_ENFORCE_EQ(framework::TensorContainsNAN(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains NAN.", var_name)); + "phi::DenseTensor %s contains NAN.", var_name)); } if (need_to_push_sparse_) { @@ -872,7 +872,7 @@ void DownpourWorker::TrainFiles() { if (var == nullptr) { continue; } - Tensor* tensor = nullptr; + phi::DenseTensor* tensor = nullptr; int64_t len = 0; if (var->IsType()) { tensor = var->GetMutable(); @@ -919,11 +919,11 @@ void DownpourWorker::TrainFiles() { PADDLE_ENFORCE_EQ(framework::TensorContainsInf(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains Inf.", var_name)); + "phi::DenseTensor %s contains Inf.", var_name)); PADDLE_ENFORCE_EQ(framework::TensorContainsNAN(*tensor), false, platform::errors::InvalidArgument( - "Tensor %s contains NAN.", var_name)); + "phi::DenseTensor %s contains NAN.", var_name)); } if (need_to_push_sparse_) { diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h index 22d3ac4333fb6..bbf34c03130c1 100644 --- a/paddle/fluid/framework/eigen.h +++ b/paddle/fluid/framework/eigen.h @@ -57,19 +57,19 @@ struct EigenTensor { using ConstType = Eigen::TensorMap>; - static Type From(Tensor& tensor, DDim dims) { // NOLINT + static Type From(phi::DenseTensor& tensor, DDim dims) { // NOLINT return Type(tensor.data(), EigenDim::From(dims)); } - static Type From(Tensor& tensor) { // NOLINT + static Type From(phi::DenseTensor& tensor) { // NOLINT return From(tensor, tensor.dims()); } // NOLINT - static ConstType From(const Tensor& tensor, DDim dims) { + static ConstType From(const phi::DenseTensor& tensor, DDim dims) { return ConstType(tensor.data(), EigenDim::From(dims)); } - static ConstType From(const Tensor& tensor) { + static ConstType From(const phi::DenseTensor& tensor) { return From(tensor, tensor.dims()); } }; @@ -78,7 +78,7 @@ template struct EigenMatrix : public EigenTensor { - static typename EigenMatrix::Type Reshape(Tensor& tensor, // NOLINT + static typename EigenMatrix::Type Reshape(phi::DenseTensor& tensor, // NOLINT int num_col_dims) { int rank = tensor.dims().size(); PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank), @@ -92,7 +92,7 @@ struct EigenMatrix : public EigenTensor { phi::flatten_to_2d(tensor.dims(), num_col_dims)); } - static typename EigenMatrix::ConstType Reshape(const Tensor& tensor, + static typename EigenMatrix::ConstType Reshape(const phi::DenseTensor& tensor, int num_col_dims) { int rank = tensor.dims().size(); PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank), @@ -111,13 +111,14 @@ template struct EigenVector : public EigenTensor { - // Flatten reshapes a Tensor into an EigenVector. - static typename EigenVector::Type Flatten(Tensor& tensor) { // NOLINT + // Flatten reshapes a phi::DenseTensor into an EigenVector. + static typename EigenVector::Type Flatten( + phi::DenseTensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims())}); } static typename EigenVector::ConstType Flatten( - const Tensor& tensor) { // NOLINT + const phi::DenseTensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims())}); } }; @@ -132,14 +133,16 @@ struct EigenScalar { using ConstType = Eigen::TensorMap< Eigen::TensorFixedSize, MajorType, IndexType>>; - static Type From(Tensor& tensor) { return Type(tensor.data()); } // NOLINT + static Type From(phi::DenseTensor& tensor) { // NOLINT + return Type(tensor.data()); + } - static ConstType From(const Tensor& tensor) { + static ConstType From(const phi::DenseTensor& tensor) { return ConstType(tensor.data()); } }; -// Define Tensor with 32-bit index. +// Define phi::DenseTensor with 32-bit index. template using Tensor32BitIndex = Eigen::TensorMap, Eigen::Aligned>; diff --git a/paddle/fluid/framework/eigen_test.cc b/paddle/fluid/framework/eigen_test.cc index 4e214bd36f33a..1ce55c8a8de2b 100644 --- a/paddle/fluid/framework/eigen_test.cc +++ b/paddle/fluid/framework/eigen_test.cc @@ -28,8 +28,8 @@ TEST(EigenDim, From) { ASSERT_EQ(3, ed[2]); } -TEST(Eigen, Tensor) { - Tensor t; +TEST(Eigen, DenseTensor) { + phi::DenseTensor t; float* p = t.mutable_data(phi::make_ddim({1, 2, 3}), platform::CPUPlace()); for (int i = 0; i < 1 * 2 * 3; i++) { @@ -52,7 +52,7 @@ TEST(Eigen, Tensor) { } TEST(Eigen, ScalarFrom) { - Tensor t; + phi::DenseTensor t; int* p = t.mutable_data(phi::make_ddim({1}), platform::CPUPlace()); *p = static_cast(100); @@ -63,7 +63,7 @@ TEST(Eigen, ScalarFrom) { } TEST(Eigen, VectorFrom) { - Tensor t; + phi::DenseTensor t; float* p = t.mutable_data(phi::make_ddim({6}), platform::CPUPlace()); for (int i = 0; i < 6; i++) { p[i] = static_cast(i); @@ -79,7 +79,7 @@ TEST(Eigen, VectorFrom) { } TEST(Eigen, VectorFlatten) { - Tensor t; + phi::DenseTensor t; float* p = t.mutable_data(phi::make_ddim({1, 2, 3}), platform::CPUPlace()); for (int i = 0; i < 1 * 2 * 3; i++) { @@ -96,7 +96,7 @@ TEST(Eigen, VectorFlatten) { } TEST(Eigen, Matrix) { - Tensor t; + phi::DenseTensor t; float* p = t.mutable_data(phi::make_ddim({2, 3}), platform::CPUPlace()); for (int i = 0; i < 2 * 3; i++) { @@ -116,7 +116,7 @@ TEST(Eigen, Matrix) { } TEST(Eigen, MatrixReshape) { - Tensor t; + phi::DenseTensor t; float* p = t.mutable_data({2, 3, 6, 4}, platform::CPUPlace()); for (int i = 0; i < 2 * 3 * 6 * 4; ++i) { p[i] = static_cast(i); diff --git a/paddle/fluid/framework/fleet/ascend_wrapper.h b/paddle/fluid/framework/fleet/ascend_wrapper.h index 2eb9dad870e67..372f0e7d38be0 100644 --- a/paddle/fluid/framework/fleet/ascend_wrapper.h +++ b/paddle/fluid/framework/fleet/ascend_wrapper.h @@ -132,7 +132,7 @@ class AscendInstance { "Not support %s as tensor type.", DataTypeToString(type))); } } - ge::Tensor ConvertToGeTensor(const Tensor *tensor) { + ge::Tensor ConvertToGeTensor(const phi::DenseTensor *tensor) { auto numel = tensor->numel(); std::vector vec_dim; auto dimen = arity(tensor->dims()); @@ -164,10 +164,10 @@ class AscendInstance { } void RunAscendSubgraph(int graph_idx, - const std::vector &inputs, - std::vector *outputs) { + const std::vector &inputs, + std::vector *outputs) { VLOG(1) << "Ascend Graph[" << graph_idx << "] is about to run."; - // Convert paddle Tensor to GE Tensor + // Convert paddle phi::DenseTensor to GE phi::DenseTensor std::vector ge_inputs; for (const auto &e : inputs) { ge_inputs.push_back(ConvertToGeTensor(e)); @@ -187,7 +187,8 @@ class AscendInstance { for (size_t i = 0; i < ge_outputs.size(); ++i) { const uint8_t *ret_data = ge_outputs[i].GetData(); size_t size = ge_outputs[i].GetSize(); - VLOG(1) << "GE Tensor size of the " << i << "th output var is " << size; + VLOG(1) << "GE phi::DenseTensor size of the " << i << "th output var is " + << size; auto *dst = (*outputs)[i]->mutable_data({(int64_t)size}, platform::CPUPlace()); memcpy(dst, ret_data, size); diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index 2a56dc60335d9..fbfca2f983e7d 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -124,7 +124,7 @@ message VarType { FP16 = 4; FP32 = 5; FP64 = 6; - // Tensor is used in C++. + // phi::DenseTensor is used in C++. SIZE_T = 19; UINT8 = 20; INT8 = 21; diff --git a/paddle/fluid/framework/infershape_utils.cc b/paddle/fluid/framework/infershape_utils.cc index 22d9eb43c59eb..93906e6c53e71 100644 --- a/paddle/fluid/framework/infershape_utils.cc +++ b/paddle/fluid/framework/infershape_utils.cc @@ -147,7 +147,7 @@ int64_t CompatMetaTensor::numel() const { ValidCheck(*this); if (is_runtime_) { auto* var = PADDLE_GET_CONST(Variable*, var_); - return var->Get().numel(); + return var->Get().numel(); } else { auto* var = PADDLE_GET_CONST(VarDesc*, var_); return var->ElementSize(); diff --git a/paddle/fluid/framework/infershape_utils.h b/paddle/fluid/framework/infershape_utils.h index 3b7744515b2ec..d85c550dc0d6f 100644 --- a/paddle/fluid/framework/infershape_utils.h +++ b/paddle/fluid/framework/infershape_utils.h @@ -81,15 +81,17 @@ class CompatMetaTensor : public phi::MetaTensor { } const phi::SelectedRows& GetSelectedRows() const { - PADDLE_ENFORCE_EQ(is_runtime_, - true, - platform::errors::Unavailable( - "Only can get Tensor from MetaTensor in rumtime.")); + PADDLE_ENFORCE_EQ( + is_runtime_, + true, + platform::errors::Unavailable( + "Only can get phi::DenseTensor from MetaTensor in rumtime.")); auto* var = PADDLE_GET_CONST(Variable*, var_); - PADDLE_ENFORCE_EQ(var->IsType(), - true, - platform::errors::Unavailable( - "The Tensor in MetaTensor is not SelectedRows.")); + PADDLE_ENFORCE_EQ( + var->IsType(), + true, + platform::errors::Unavailable( + "The phi::DenseTensor in MetaTensor is not SelectedRows.")); return var->Get(); } diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc index 9bfc031f42e94..1e84646378106 100644 --- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc @@ -47,7 +47,7 @@ AttentionLSTMFusePass::AttentionLSTMFusePass() { .IsTensor() .IsOptional() .End() - .AddInput("ShapeTensorList") // vector> + .AddInput("ShapeTensorList") // vector> .IsOptional() .End() .AddOutput("Out") @@ -262,11 +262,12 @@ void PrepareParameters(Graph* graph, const Param& param, ir::Node* lstm_op) { // reshape attention_bias auto* attention_bias_t = scope.FindVar(param.AttentionBias)->GetMutable(); - PADDLE_ENFORCE_EQ(attention_bias_t->dims().size(), - 1, - platform::errors::InvalidArgument( - "Tensor attention bias dimension size(%d) must be 1.", - attention_bias_t->dims().size())); + PADDLE_ENFORCE_EQ( + attention_bias_t->dims().size(), + 1, + platform::errors::InvalidArgument( + "phi::DenseTensor attention bias dimension size(%d) must be 1.", + attention_bias_t->dims().size())); attention_bias_t->Resize(phi::make_ddim({1, attention_bias_t->dims()[0]})); auto* attention_scalar_bias_t = @@ -339,11 +340,12 @@ void PrepareLSTMBias(const LoDTensor& B_forget, B_output.data(), B_cell.data()}; - PADDLE_ENFORCE_EQ(B_forget.dims().size(), - 1, - platform::errors::InvalidArgument( - "Tensor B forget dimension size(%d) must be 1.", - B_forget.dims().size())); + PADDLE_ENFORCE_EQ( + B_forget.dims().size(), + 1, + platform::errors::InvalidArgument( + "phi::DenseTensor B forget dimension size(%d) must be 1.", + B_forget.dims().size())); int D = B_forget.dims()[0]; out->Resize(phi::make_ddim({1, 4 * D})); auto* out_data = out->mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 3c7f77708cd0b..5da676dd09487 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -38,7 +38,7 @@ class Scope; namespace { template void ConvertTensorType(paddle::framework::LoDTensor* tensor) { - paddle::framework::Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; tmp_tensor.set_type(paddle::experimental::CppTypeToDataType::Type()); tmp_tensor.Resize(tensor->dims()); auto* tmp_data = tmp_tensor.mutable_data(paddle::platform::CPUPlace()); @@ -93,13 +93,13 @@ void recompute_bias_and_weights(const Scope* scope, Eigen::Array>; // Re-compute bias of conv2d from BN - PADDLE_ENFORCE_EQ( - eltwise_y_in_tensor->dims(), - bn_bias_tensor.dims(), - platform::errors::InvalidArgument("Tensor elementwise y(%d) and batch " - "norm bias(%d) must have same dims.", - eltwise_y_in_tensor->dims().size(), - bn_bias_tensor.dims().size())); + PADDLE_ENFORCE_EQ(eltwise_y_in_tensor->dims(), + bn_bias_tensor.dims(), + platform::errors::InvalidArgument( + "phi::DenseTensor elementwise y(%d) and batch " + "norm bias(%d) must have same dims.", + eltwise_y_in_tensor->dims().size(), + bn_bias_tensor.dims().size())); auto* scale_tensor = scope->FindVar(bn_scale.Name())->GetMutable(); auto* variance_tensor = @@ -375,7 +375,7 @@ void ConvBNFusePass::ApplyImpl(ir::Graph* graph) const { conv_bias_tensor->dims(), eltwise_y_in_tensor->dims(), platform::errors::InvalidArgument( - "Tensor convolution bias(%d) and elementwise y(%d) " + "phi::DenseTensor convolution bias(%d) and elementwise y(%d) " "must have same dims.", conv_bias_tensor->dims().size(), eltwise_y_in_tensor->dims().size())); diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index a71f6ac94b415..1cdefad43030e 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -132,7 +132,7 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const { GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); - // Only support 2D-Tensor as weight for FC + // Only support 2D-phi::DenseTensor as weight for FC std::vector w_shape = w->Var()->GetShape(); size_t w_rank = w_shape.size(); if (w_rank != 2) return; diff --git a/paddle/fluid/framework/ir/fusion_group/operation.cc b/paddle/fluid/framework/ir/fusion_group/operation.cc index dd399eea604fd..98279e73c1a7f 100644 --- a/paddle/fluid/framework/ir/fusion_group/operation.cc +++ b/paddle/fluid/framework/ir/fusion_group/operation.cc @@ -141,7 +141,8 @@ void OperationMap::InsertUnaryElementwiseOperations() { // scale // out = (bias_after_scale) ? scale * X + bias : scale(X + bias) // here we use '=' operator to separate th default value - // TODO(wangchaochaohu): Later we need to support Tensor input for scale and + // TODO(wangchaochaohu): Later we need to support phi::DenseTensor input for + // scale and // bias. insert_handler( "scale", diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc index 8a24e93170c25..69d304d0a7c2b 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc @@ -132,7 +132,7 @@ class ShrinkDepsOpFunctor { /** * Shrink op dependencies according to no need buffer vars. * - * If some ops do not need Tensor buffer of any input, + * If some ops do not need phi::DenseTensor buffer of any input, * just remove the dependency of this op, i.e, decrease reference count. * * For example, input Y of elementwise_add_grad op is only used to infer shape diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index 394c1ae797e4c..df19bc9ade8d5 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -27,7 +27,7 @@ namespace framework { namespace ir { void ComputePropagateScalesMkldnnPass::GetTensorFromVector( - const std::vector& data_v, Tensor* tensor) const { + const std::vector& data_v, phi::DenseTensor* tensor) const { const int size = static_cast(data_v.size()); auto* data = tensor->mutable_data({size}, platform::CPUPlace()); for (int i = 0; i < size; i++) { @@ -41,15 +41,15 @@ void ComputePropagateScalesMkldnnPass::GetQuantInfo( GetInfoFromTheFirstOp(graph, "has_quant_info", "var_quant_scales", &info_map); for (auto iter = info_map.begin(); iter != info_map.end(); iter++) { - Tensor tensor; + phi::DenseTensor tensor; GetTensorFromVector(iter->second, &tensor); auto pair = std::make_pair(false, tensor); var_quant_scales->insert(std::make_pair(iter->first, pair)); } } -std::vector ComputePropagateScalesMkldnnPass::GetScales(Tensor* tensor, - int axis) const { +std::vector ComputePropagateScalesMkldnnPass::GetScales( + phi::DenseTensor* tensor, int axis) const { PADDLE_ENFORCE_LT(axis, 2, platform::errors::InvalidArgument( @@ -120,7 +120,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales( volume *= dims[i]; } - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; std::vector reshape_dims = {dims[0], volume}; tmp_tensor.Resize(phi::make_ddim(reshape_dims)); auto* weight_data = weight_tensor->data(); @@ -130,7 +130,7 @@ void ComputePropagateScalesMkldnnPass::ComputeVarScales( } auto scales_v = GetScales(&tmp_tensor, axis); - Tensor tensor; + phi::DenseTensor tensor; GetTensorFromVector(scales_v, &tensor); auto pair = std::make_pair(false, tensor); var_quant_scales->insert(std::make_pair(var_name, pair)); @@ -142,7 +142,7 @@ void ComputePropagateScalesMkldnnPass::ComputeSingleGruWeightScales( Scope* scope, const std::string& wx_var_name, const std::string& wh_var_name, - Tensor* tensor) const { + phi::DenseTensor* tensor) const { auto* wx_var = scope->FindVar(wx_var_name); PADDLE_ENFORCE_NOT_NULL( wx_var, @@ -228,7 +228,7 @@ void ComputePropagateScalesMkldnnPass::ComputeGruWeightScales( for (int i = 0; i < wx_names_size; i++) { auto wh_var_name = wh_var_names[i]; auto wx_var_name = wx_var_names[i]; - Tensor tensor; + phi::DenseTensor tensor; ComputeSingleGruWeightScales(scope, wx_var_name, wh_var_name, &tensor); auto pair = std::make_pair(false, tensor); var_quant_scales->insert(std::make_pair(wx_var_name, pair)); @@ -241,7 +241,7 @@ void ComputePropagateScalesMkldnnPass::ComputeSingleLstmWeightScales( Scope* scope, const std::string& wx_var_name, const std::string& wh_var_name, - Tensor* tensor) const { + phi::DenseTensor* tensor) const { auto* wx_var = scope->FindVar(wx_var_name); PADDLE_ENFORCE_NOT_NULL( wx_var, @@ -307,7 +307,7 @@ void ComputePropagateScalesMkldnnPass::ComputeLstmWeightScales( for (int i = 0; i < wx_names_size; i++) { auto wh_var_name = wh_var_names[i]; auto wx_var_name = wx_var_names[i]; - Tensor tensor; + phi::DenseTensor tensor; ComputeSingleLstmWeightScales(scope, wx_var_name, wh_var_name, &tensor); auto pair = std::make_pair(false, tensor); var_quant_scales->insert(std::make_pair(wx_var_name, pair)); @@ -348,7 +348,7 @@ void ComputePropagateScalesMkldnnPass::UpdateScaleOpInScale( const auto tensor = pair.second; const auto scale = PADDLE_GET_CONST(float, op_node->Op()->GetAttr("scale")); - Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; tmp_tensor.Resize(tensor.dims()); auto* data = tmp_tensor.mutable_data(platform::CPUPlace()); for (int i = 0; i < tensor.numel(); i++) { diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h index 09863fdc768b2..ecc3ad16a54e6 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h @@ -22,7 +22,8 @@ namespace paddle { namespace framework { namespace ir { -using StringPairMap = std::unordered_map>; +using StringPairMap = + std::unordered_map>; class ComputePropagateScalesMkldnnPass : public FusePassBase { public: @@ -38,11 +39,11 @@ class ComputePropagateScalesMkldnnPass : public FusePassBase { private: void GetTensorFromVector(const std::vector& data_v, - Tensor* tensor) const; + phi::DenseTensor* tensor) const; void GetQuantInfo(ir::Graph* graph, StringPairMap* var_quant_scales) const; - std::vector GetScales(Tensor* tensor, int axis) const; + std::vector GetScales(phi::DenseTensor* tensor, int axis) const; void ComputeVarScales(ir::Graph* graph, Scope* scope, @@ -54,7 +55,7 @@ class ComputePropagateScalesMkldnnPass : public FusePassBase { void ComputeSingleGruWeightScales(Scope* scope, const std::string& wx_var_name, const std::string& wh_var_name, - Tensor* tensor) const; + phi::DenseTensor* tensor) const; void ComputeGruWeightScales(ir::Graph* graph, Scope* scope, @@ -65,7 +66,7 @@ class ComputePropagateScalesMkldnnPass : public FusePassBase { void ComputeSingleLstmWeightScales(Scope* scope, const std::string& wx_var_name, const std::string& wh_var_name, - Tensor* tensor) const; + phi::DenseTensor* tensor) const; void ComputeLstmWeightScales(ir::Graph* graph, Scope* scope, diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc index 38c6fb57d58e3..03c01507ca27d 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass_tester.cc @@ -59,7 +59,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { pass.reset(new ComputePropagateScalesMkldnnPass()); } - std::vector GetScales(Tensor* tensor, int axis) const { + std::vector GetScales(phi::DenseTensor* tensor, int axis) const { return pass->GetScales(tensor, axis); } @@ -164,7 +164,7 @@ class ComputePropagateScalesMkldnnPassTest : public testing::Test { graph, &scope, wx_name, wh_name, &var_quant_scales); } bool is_unsigned; - framework::Tensor wx_result_tensor; + phi::DenseTensor wx_result_tensor; std::tie(is_unsigned, wx_result_tensor) = var_quant_scales[wx_var_names]; ASSERT_EQ(is_unsigned, false); @@ -235,7 +235,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, get_scales_function) { const auto& values = positive_and_negative_values; float max_val = *std::max_element(values.begin(), values.end()); - framework::Tensor var_tensor; + phi::DenseTensor var_tensor; var_tensor.Resize(phi::make_dim(values.size(), 1)); std::copy(begin(values), end(values), @@ -273,7 +273,7 @@ TEST_F(ComputePropagateScalesMkldnnPassTest, compute_var_scales) { ComputeVarScales(graph, &scope, ops, weight_name, axis, &var_quant_scales); bool is_unsigned; - framework::Tensor result_tensor; + phi::DenseTensor result_tensor; std::tie(is_unsigned, result_tensor) = var_quant_scales[weight_var_name]; diff --git a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc index b9ffee3c00c46..814d1d5d73dcd 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_affine_channel_mkldnn_fuse_pass.cc @@ -62,14 +62,14 @@ void recompute_bias_and_weights(const Scope* scope, Eigen::Array>; // Re-compute bias of conv2d from AffineChannel - PADDLE_ENFORCE_EQ( - eltwise_y_in_tensor->dims(), - ac_bias_tensor.dims(), - platform::errors::InvalidArgument( - "Tensor elementwise y(%d) and activation bias(%d) must have same " - "dimension.", - eltwise_y_in_tensor->dims().size(), - ac_bias_tensor.dims().size())); + PADDLE_ENFORCE_EQ(eltwise_y_in_tensor->dims(), + ac_bias_tensor.dims(), + platform::errors::InvalidArgument( + "phi::DenseTensor elementwise y(%d) and activation " + "bias(%d) must have same " + "dimension.", + eltwise_y_in_tensor->dims().size(), + ac_bias_tensor.dims().size())); auto* scale_tensor = scope->FindVar(ac_scale.Name())->GetMutable(); diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc index 6f7bb614cc79f..78fc02329efe2 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc @@ -192,7 +192,7 @@ class MKLDNNConvBatchNormPassTest { return prog; } - void FillTensorWithRandomData(Tensor* tnsr, + void FillTensorWithRandomData(phi::DenseTensor* tnsr, float lowb, float upb, platform::CPUPlace place) { @@ -206,7 +206,7 @@ class MKLDNNConvBatchNormPassTest { } } - void CompareTensors(Tensor* tensor1, Tensor* tensor2) { + void CompareTensors(phi::DenseTensor* tensor1, phi::DenseTensor* tensor2) { // check dims for (int i = 0; i < tensor1->numel(); ++i) { EXPECT_NEAR(tensor1->data()[i], tensor2->data()[i], 1e-3); @@ -306,7 +306,7 @@ class MKLDNNConvBatchNormPassTest { // Need to copy result over as the same scope is used in both executors // so first result will be overwritten by second auto* m_tensor = exe.FindTensor("m"); - Tensor no_ir_result; + phi::DenseTensor no_ir_result; TensorCopy(*m_tensor, place, &no_ir_result); graph.reset(pass->Apply(graph.release())); diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc index b674ef52183c0..abe51960183c5 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc @@ -406,7 +406,7 @@ void QuantDequantMkldnnPass::RemoveFakeOps( GraphSafeRemoveNodes(graph, nodes2rm); } -void QuantDequantMkldnnPass::TransposeWeight(Tensor* input) const { +void QuantDequantMkldnnPass::TransposeWeight(phi::DenseTensor* input) const { const auto in_dims = input->dims(); std::vector out_dim_v; std::vector axis; @@ -421,7 +421,7 @@ void QuantDequantMkldnnPass::TransposeWeight(Tensor* input) const { auto out_stride = phi::stride(out_dims); const int count = input->numel(); - Tensor trans_tensor; + phi::DenseTensor trans_tensor; trans_tensor.Resize(out_dims); float* trans_data = trans_tensor.mutable_data(platform::CPUPlace()); float* in_data = input->mutable_data(platform::CPUPlace()); @@ -465,7 +465,7 @@ bool QuantDequantMkldnnPass::IsInt8Weight( void QuantDequantMkldnnPass::ConvertFromINT8ToFP32( const std::vector& scales, - Tensor* weight_tensor, + phi::DenseTensor* weight_tensor, int8_t* int8_weight_data, float* fp32_weight_data, const std::string& weight_var_name) const { diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h index eee7fc96ed1d4..deb9072e04a49 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h @@ -66,7 +66,7 @@ class QuantDequantMkldnnPass : public FusePassBase { const; void ConvertFromINT8ToFP32(const std::vector& scales, - Tensor* weight_tensor, + phi::DenseTensor* weight_tensor, int8_t* int8_weight_data, float* fp32_weight_data, const std::string& weight_var_name) const; @@ -106,7 +106,7 @@ class QuantDequantMkldnnPass : public FusePassBase { Scope* scope, const std::string& weight_name) const; - void TransposeWeight(Tensor* input) const; + void TransposeWeight(phi::DenseTensor* input) const; void DequantizeOpWeights( Node* op_node, diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 089c252ea6947..ed1d0653df715 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -639,12 +639,12 @@ PDNode* MultiHeadMatmulV3Pattern::operator()() { namespace { template -inline void QKVWeightsProcess(Tensor* wq_tensor, - Tensor* wk_tensor, - Tensor* wv_tensor, - Tensor* bq_tensor, - Tensor* bk_tensor, - Tensor* bv_tensor) { +inline void QKVWeightsProcess(phi::DenseTensor* wq_tensor, + phi::DenseTensor* wk_tensor, + phi::DenseTensor* wv_tensor, + phi::DenseTensor* bq_tensor, + phi::DenseTensor* bk_tensor, + phi::DenseTensor* bv_tensor) { auto* wq_data = wq_tensor->mutable_data(platform::CPUPlace()); auto* wk_data = wk_tensor->mutable_data(platform::CPUPlace()); auto* wv_data = wv_tensor->mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/framework/ir/pass_test_util.h b/paddle/fluid/framework/ir/pass_test_util.h index c028108617c21..44f6c66295466 100644 --- a/paddle/fluid/framework/ir/pass_test_util.h +++ b/paddle/fluid/framework/ir/pass_test_util.h @@ -129,7 +129,7 @@ bool RunPassAndAssert(Graph* graph, /// @param[in] var_name The variable name. /// @param[in] dims The dimensions of allocated tensor. /// -/// @tparam T Tensor data type. +/// @tparam T phi::DenseTensor data type. /// template void InitLoDTensorHolder(const Scope& scope, diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 33d293faad129..dec38ae386159 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -22,18 +22,18 @@ limitations under the License. */ #include #include "paddle/fluid/framework/mixed_vector.h" -#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/ddim.h" +#include "paddle/phi/core/dense_tensor.h" namespace paddle { namespace framework { using LoDTensor = phi::DenseTensor; -// Split Tensor and copy to each place specified in places. +// Split phi::DenseTensor and copy to each place specified in places. std::vector SplitLoDTensor( const LoDTensor& src, const std::vector places); diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index 06962f7b5e773..a348adbedfcc7 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -194,7 +194,8 @@ bool IsTensorOfVarInitialized(Variable* var) { if (var->IsType() || var->IsType()) { return GetLoDTensorOrSelectedRowsValueFromVar(*var)->IsInitialized(); } else if (var->IsType()) { - return static_cast(&(var->Get()[0])) + return static_cast( + &(var->Get()[0])) ->IsInitialized(); } } @@ -440,7 +441,7 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, for (size_t i = 0; i < var_name_item.second.size(); ++i) { auto var = var_name_item.second[i]; auto var_name = new_ins[var_name_item.first].at(i); - const Tensor* tensor_in; + const phi::DenseTensor* tensor_in; std::string new_var_name; bool is_transferred = false; @@ -450,8 +451,8 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, if (var->Get().size() == 0) { continue; } - tensor_in = - static_cast(&(var->Get()[0])); + tensor_in = static_cast( + &(var->Get()[0])); } else { continue; } @@ -470,7 +471,8 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key, (expected_kernel_key.data_layout_ != DataLayout::kMKLDNN) && (paddle::platform::MKLDNNDeviceContext::tls() .get_cur_paddle_data_layout() == DataLayout::kNHWC)) { - VLOG(7) << "Created reshaped dummy input based on MKL-DNN Tensor , " + VLOG(7) << "Created reshaped dummy input based on MKL-DNN " + "phi::DenseTensor , " "but kNHWC layout" << var_name_item.first << " in Operator " << op_base->Type(); diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index c379e135b16b6..d973942d9f975 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -355,10 +355,10 @@ void InterpreterCore::BuildAndCacheInstructionCtx(Instruction* instr_node) { } void InterpreterCore::BuildInplace() { - // NOTE(Ruibiao): coalesce_tensor_op outputs a FusedOutput Tensor and a list - // of Output Tensors which are sliced from the FusedOutput. These outputs - // sholud not be the outvar of the in-place var-pair since memory reuse - // between FusedOutput and Output Tensors is assumed. For the following + // NOTE(Ruibiao): coalesce_tensor_op outputs a FusedOutput phi::DenseTensor + // and a list of Output Tensors which are sliced from the FusedOutput. These + // outputs sholud not be the outvar of the in-place var-pair since memory + // reuse between FusedOutput and Output Tensors is assumed. For the following // example: // fused_var, var1, var2, var3 = coalesce_tensor(var1, var2, var3) // var1 = sum(var4, var5) @@ -444,9 +444,9 @@ void InterpreterCore::BuildOperatorDependences() { } } -// At the end of each step, the holder of Tensor in LoDTensorArray is null. -// Clear these Tensors and leave LoDTensorArray empty, otherwise an exception -// will occur in the next step +// At the end of each step, the holder of phi::DenseTensor in LoDTensorArray is +// null. Clear these Tensors and leave LoDTensorArray empty, otherwise an +// exception will occur in the next step void InterpreterCore::ClearLoDTensorArrayInLocalScope() { auto vars = local_scope_->LocalVars(); for (auto var : vars) { @@ -994,7 +994,7 @@ void InterpreterCore::RecordStreamForGC(const Instruction& instr) { gpuStream_t stream = reinterpret_cast(instr.DeviceContext()).stream(); - auto TensorRecordStream = [&stream](Tensor& tensor) { + auto TensorRecordStream = [&stream](phi::DenseTensor& tensor) { auto allocation = tensor.Holder(); if (allocation == nullptr) { return; diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index c40a80ce0752c..0d4fdaab41b6b 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -299,7 +299,7 @@ void InterpretercoreInferShapeContext::ShareLoD(const std::string& in, // TODO(dzhwinter) : reuse ShareLoD in most operators. // Need to call ShareLayout explicitly in sequence related ops. -// Shall we have a better method to shared info between in/out Tensor? +// Shall we have a better method to shared info between in/out phi::DenseTensor? #ifdef PADDLE_WITH_MKLDNN // Fix me: ugly workaround below // Correct solution: diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 4c28a9b59535e..ac0af60ec722e 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -528,9 +528,10 @@ void OperatorBase::GenerateTemporaryNames() { } } -const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) { +const phi::DenseTensor* GetLoDTensorOrSelectedRowsValueFromVar( + const Variable& var) { if (var.IsType()) { - return static_cast(&(var.Get())); + return static_cast(&(var.Get())); } else if (var.IsType()) { return &(var.Get().value()); } else { @@ -540,7 +541,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) { } } -Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) { +phi::DenseTensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) { if (var->IsType()) { return var->GetMutable(); } else if (var->IsType()) { @@ -607,20 +608,20 @@ Variable* ExecutionContext::OutputVar(const std::string& name) const { } template <> -const std::vector ExecutionContext::MultiInput( - const std::string& name) const { +const std::vector +ExecutionContext::MultiInput(const std::string& name) const { LogVarUsageIfUnusedVarCheckEnabled(name); auto vars = MultiInputVar(name); if (vars.size() == 0) { return {}; } - std::vector res; + std::vector res; res.reserve(vars.size()); std::transform(vars.begin(), vars.end(), std::back_inserter(res), - [&](const Variable* var) -> const Tensor* { + [&](const Variable* var) -> const phi::DenseTensor* { if (var == nullptr) return nullptr; PADDLE_ENFORCE_EQ(var->IsType(), true, @@ -634,19 +635,19 @@ const std::vector ExecutionContext::MultiInput( } template <> -std::vector ExecutionContext::MultiOutput( +std::vector ExecutionContext::MultiOutput( const std::string& name) const { auto vars = MultiOutputVar(name); if (vars.size() == 0) { return {}; } - std::vector res; + std::vector res; res.reserve(vars.size()); std::transform(vars.begin(), vars.end(), std::back_inserter(res), - [&](Variable* var) -> Tensor* { + [&](Variable* var) -> phi::DenseTensor* { return var == nullptr ? nullptr : var->GetMutable(); }); @@ -958,7 +959,7 @@ class RuntimeInferShapeContext : public InferShapeContext { // TODO(dzhwinter) : reuse ShareLoD in most operators. // Need to call ShareLayout explicitly in sequence related ops. -// Shall we have a better method to shared info between in/out Tensor? +// Shall we have a better method to shared info between in/out phi::DenseTensor? #ifdef PADDLE_WITH_MKLDNN // Fix me: ugly workaround below // Correct solution: @@ -1210,7 +1211,7 @@ struct OperatorWithKernel::CacheImpl { static void CheckTensorNANOrInf(const std::string& op_type, const std::string& name, - const framework::Tensor& tensor) { + const phi::DenseTensor& tensor) { if (tensor.memory_size() == 0) { return; } @@ -1218,16 +1219,18 @@ static void CheckTensorNANOrInf(const std::string& op_type, framework::TransToProtoVarType(tensor.dtype()) != proto::VarType::FP64) { return; } - PADDLE_ENFORCE_NE( - framework::TensorContainsInf(tensor), - true, - platform::errors::Fatal( - "Operator %s output Tensor %s contains Inf.", op_type, name)); - PADDLE_ENFORCE_NE( - framework::TensorContainsNAN(tensor), - true, - platform::errors::Fatal( - "Operator %s output Tensor %s contains NAN.", op_type, name)); + PADDLE_ENFORCE_NE(framework::TensorContainsInf(tensor), + true, + platform::errors::Fatal( + "Operator %s output phi::DenseTensor %s contains Inf.", + op_type, + name)); + PADDLE_ENFORCE_NE(framework::TensorContainsNAN(tensor), + true, + platform::errors::Fatal( + "Operator %s output phi::DenseTensor %s contains NAN.", + op_type, + name)); } bool OperatorWithKernel::SupportGPU() const { @@ -2112,7 +2115,7 @@ void OperatorWithKernel::HandleComplexGradToRealGrad( << " var `" << var_name << "` to " << framework::DataTypeToString(dst_type) << " real var in static graph."; - Tensor out; + phi::DenseTensor out; TransComplexToReal(dst_type, src_type, *grad_tensor, &out); SetTensorToVariable(*grad_var, out, grad_var); } @@ -2153,7 +2156,7 @@ Scope* OperatorWithKernel::PrepareData( auto* tensor_in = GetLoDTensorOrSelectedRowsValueFromVar(*var); - // When no_buffer_ins then checking of Tensor::holder_ is + // When no_buffer_ins then checking of phi::DenseTensor::holder_ is // not a thread safe. And for infershape scenario checks // to be omitted are not really needed if (should_skip_input == true) { @@ -2180,7 +2183,8 @@ Scope* OperatorWithKernel::PrepareData( out->Resize(tensor_in->dims()); platform::MatchShapeToLayout( out, tensor_in->layout(), DataLayout::kNHWC); - VLOG(7) << "Created reshaped dummy input based on MKL-DNN Tensor , " + VLOG(7) << "Created reshaped dummy input based on MKL-DNN " + "phi::DenseTensor , " "but kNHWC layout" << in_name << " in Operator " << type_; } else { @@ -2308,7 +2312,7 @@ Scope* OperatorWithKernel::PrepareData( } // Do transfer - Tensor out; + phi::DenseTensor out; TransformData(new_expected_kernel_key ? *new_expected_kernel_key : expected_kernel_key, kernel_type_for_var, @@ -2375,9 +2379,9 @@ void OperatorWithKernel::ParseInputDataType( const std::string& name, proto::VarType::Type* data_type) const { if (var != nullptr) { - const Tensor* t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor* t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } else if (var->IsType()) { @@ -2391,13 +2395,13 @@ void OperatorWithKernel::ParseInputDataType( } } if (t != nullptr) { - PADDLE_ENFORCE_EQ( - t->IsInitialized(), - true, - platform::errors::InvalidArgument("The %s Op's Input Variable `%s` " - "contains uninitialized Tensor.", - Type(), - name)); + PADDLE_ENFORCE_EQ(t->IsInitialized(), + true, + platform::errors::InvalidArgument( + "The %s Op's Input Variable `%s` " + "contains uninitialized phi::DenseTensor.", + Type(), + name)); *data_type = paddle::framework::TransToProtoVarType(t->dtype()); } } @@ -2412,9 +2416,9 @@ void OperatorWithKernel::ParseMultiInputDataType( for (size_t i = 0; i < vars.size(); ++i) { const Variable* var = vars[i]; if (var != nullptr) { - const Tensor* t = nullptr; - if (var->IsType()) { - t = &var->Get(); + const phi::DenseTensor* t = nullptr; + if (var->IsType()) { + t = &var->Get(); } else if (var->IsType()) { t = &var->Get(); } else if (var->IsType()) { @@ -2428,13 +2432,13 @@ void OperatorWithKernel::ParseMultiInputDataType( } } if (t != nullptr) { - PADDLE_ENFORCE_EQ( - t->IsInitialized(), - true, - platform::errors::InvalidArgument("The %s Op's Input Variable `%s` " - "contains uninitialized Tensor.", - Type(), - name)); + PADDLE_ENFORCE_EQ(t->IsInitialized(), + true, + platform::errors::InvalidArgument( + "The %s Op's Input Variable `%s` " + "contains uninitialized phi::DenseTensor.", + Type(), + name)); proto::VarType::Type tmp = paddle::framework::TransToProtoVarType(t->dtype()); PADDLE_ENFORCE(tmp == *data_type || *data_type == default_data_type, @@ -2496,7 +2500,7 @@ proto::VarType::Type OperatorWithKernel::IndicateVarDataType( return data_type; } -Tensor* OperatorWithKernel::GetTensorFormInputSafely( +phi::DenseTensor* OperatorWithKernel::GetTensorFormInputSafely( const ExecutionContext& ctx, const std::string& name) const { // 1. get variable and check // NOTE: only supports signal input var now @@ -2509,9 +2513,9 @@ Tensor* OperatorWithKernel::GetTensorFormInputSafely( platform::errors::NotFound( "The variable %s is not found when promote complex types.", name)); // 2. get tensor and check - Tensor* t = nullptr; - if (var->IsType()) { - t = var->GetMutable(); + phi::DenseTensor* t = nullptr; + if (var->IsType()) { + t = var->GetMutable(); } else if (var->IsType()) { t = var->GetMutable(); } else if (var->IsType()) { @@ -2520,18 +2524,19 @@ Tensor* OperatorWithKernel::GetTensorFormInputSafely( PADDLE_THROW(platform::errors::Unimplemented( "Unsupported input variable type in complex type promotion.")); } - PADDLE_ENFORCE_NOT_NULL( - t, + PADDLE_ENFORCE_NOT_NULL(t, + platform::errors::InvalidArgument( + "The phi::DenseTensor of variable %s is nullptr " + "when promote complex types.")); + PADDLE_ENFORCE_EQ( + t->IsInitialized(), + true, platform::errors::InvalidArgument( - "The Tensor of variable %s is nullptr when promote complex types.")); - PADDLE_ENFORCE_EQ(t->IsInitialized(), - true, - platform::errors::InvalidArgument( - "The Tensor in the %s Op's Input Variable %s(%s) is " - "not initialized.", - Type(), - name, - ctx.InputName(name))); + "The phi::DenseTensor in the %s Op's Input Variable %s(%s) is " + "not initialized.", + Type(), + name, + ctx.InputName(name))); return t; } @@ -2567,7 +2572,7 @@ OpKernelType OperatorWithKernel::GetExpectedKernelType( OpKernelType OperatorWithKernel::GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const OpKernelType& expected_kernel_type) const { return OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index edb2d539f82ef..33b1f5c32300b 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -124,8 +124,9 @@ inline bool VarIsTensor(const Variable& var) { return var.IsType() || var.IsType(); } -const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var); -Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var); +const phi::DenseTensor* GetLoDTensorOrSelectedRowsValueFromVar( + const Variable& var); +phi::DenseTensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var); class ExecutionContext; class OperatorBase; @@ -449,8 +450,8 @@ class ExecutionContext { #endif template - Tensor AllocateTmpTensor(const framework::DDim& dim, - const DevContext& dev_ctx) const { + phi::DenseTensor AllocateTmpTensor(const framework::DDim& dim, + const DevContext& dev_ctx) const { phi::DenseTensor tmp; tmp.Resize(dim); dev_ctx.template Alloc(&tmp); @@ -552,11 +553,11 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext { }; template <> -const std::vector ExecutionContext::MultiInput( - const std::string& name) const; +const std::vector +ExecutionContext::MultiInput(const std::string& name) const; template <> -std::vector ExecutionContext::MultiOutput( +std::vector ExecutionContext::MultiOutput( const std::string& name) const; class OpKernelBase { @@ -640,7 +641,7 @@ class OperatorWithKernel : public OperatorBase { // need transform data virtual OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const OpKernelType& expected_kernel_type) const; platform::Place GetExecutionPlace( @@ -649,12 +650,13 @@ class OperatorWithKernel : public OperatorBase { } /* member functions for adapting to phi lib */ - /** In the Tensor calculation library, the new Kernel adopts a clearer and - * more streamlined design. The arguments of the Kernel and the input and - * output arguments registered in the original OpMaker do not match in some - * cases, so we use map to record the arguments required by the kernel. - * When selecting Kernel during Op execution, select the arguments of the - * original Op according to the GetExpectedPhiKernelArgs returned arguments. + /** In the phi::DenseTensor calculation library, the new Kernel adopts a + * clearer and more streamlined design. The arguments of the Kernel and the + * input and output arguments registered in the original OpMaker do not match + * in some cases, so we use map to record the arguments required by the + * kernel. When selecting Kernel during Op execution, select the arguments of + * the original Op according to the GetExpectedPhiKernelArgs returned + * arguments. */ phi::KernelSignature GetExpectedPhiKernelArgs( const ExecutionContext& ctx) const; @@ -723,8 +725,8 @@ class OperatorWithKernel : public OperatorBase { const std::string& name, proto::VarType::Type* data_type) const; // used for IndicateOrPromoteVarDataTypes - Tensor* GetTensorFormInputSafely(const ExecutionContext& ctx, - const std::string& name) const; + phi::DenseTensor* GetTensorFormInputSafely(const ExecutionContext& ctx, + const std::string& name) const; protected: mutable std::unique_ptr kernel_type_; diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index b5aaa22e86ee2..20aab15651d16 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -146,7 +146,7 @@ class CPUKernelTest : public OpKernel { cpu_kernel_run_num++; ASSERT_EQ(ctx.InputName("x"), "IN1"); ASSERT_EQ(ctx.OutputName("y"), "OUT1"); - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); ASSERT_EQ(x, nullptr); } }; @@ -196,13 +196,13 @@ class CPUKernalMultiInputsTest : public OpKernel { auto outVar0 = ctx.MultiOutputVar("ys"); ASSERT_EQ(outVar0.size(), 2U); - auto inTensor0 = ctx.MultiInput("xs"); + auto inTensor0 = ctx.MultiInput("xs"); ASSERT_EQ(inTensor0.size(), 3U); - auto intTensor1 = ctx.Input("k"); + auto intTensor1 = ctx.Input("k"); ASSERT_NE(intTensor1, nullptr); - auto outTensor0 = ctx.MultiOutput("ys"); + auto outTensor0 = ctx.MultiOutput("ys"); ASSERT_EQ(outTensor0.size(), 2U); auto k = ctx.InputName("k"); @@ -349,7 +349,7 @@ class IndicateLoDTensorDataTypeTest : public OperatorWithKernel { class IndicateLoDTensorDataTypeTestProtoMaker : public OpProtoAndCheckerMaker { public: void Make() { - AddInput("LoDTensor", "Input of Tensor type Variable."); + AddInput("LoDTensor", "Input of phi::DenseTensor type Variable."); AddComment("This Op is only for IndicateVarDataType interface test."); } }; @@ -450,7 +450,8 @@ TEST(IndicateVarDataTypeTest, lodtensor) { EXPECT_TRUE( ex_msg.find( "The indicate_lod_tensor_data_type_test Op's Input Variable " - "`LoDTensor` contains uninitialized Tensor.") != std::string::npos); + "`LoDTensor` contains uninitialized phi::DenseTensor.") != + std::string::npos); } ASSERT_TRUE(caught); } @@ -477,7 +478,7 @@ TEST(IndicateVarDataTypeTest, selectedrows) { EXPECT_TRUE( ex_msg.find("The indicate_selected_rows_data_type_test Op's " "Input Variable `SelectedRows` contains uninitialized " - "Tensor.") != std::string::npos); + "phi::DenseTensor.") != std::string::npos); } ASSERT_TRUE(caught); } @@ -684,8 +685,8 @@ class OpWithoutUnusedVarKernelTest : public OpKernel { void Compute(const ExecutionContext& ctx) const { ASSERT_EQ(ctx.InputName("X"), "X"); ASSERT_EQ(ctx.OutputName("Y"), "Y"); - auto* x = ctx.Input("X"); - auto* y = ctx.Output("Y"); + auto* x = ctx.Input("X"); + auto* y = ctx.Output("Y"); ASSERT_NE(x, y); ASSERT_NE(y, nullptr); } diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc index 0e1a75ebe64ee..79ba56ab147a3 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc @@ -37,7 +37,7 @@ namespace paddle2cinn { using ir::Graph; using ir::Node; -using CinnTensor = ::cinn::hlir::framework::Tensor; +using CinnTensor = ::cinn::hlir::Tensor; using OpMapperContext = CinnGraphSymbolization::OpMapperContext; using CinnOpDesc = CinnGraphSymbolization::CinnOpDesc; using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; @@ -45,7 +45,7 @@ using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; namespace utils { OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor( - const Tensor& tensor, bool skip_trans_type = false) { + const phi::DenseTensor& tensor, bool skip_trans_type = false) { OpMapperContext::FeedInfo info; const auto& dim = tensor.dims(); for (int i = 0; i < dim.size(); i++) { diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc index 12bd9564c1ae3..929f009b2a3a2 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc @@ -24,7 +24,7 @@ namespace paddle2cinn { using ::cinn::frontend::NetBuilder; using ir::Graph; using ir::Node; -using CinnTensor = ::cinn::hlir::framework::Tensor; +using CinnTensor = ::cinn::hlir::Tensor; using OpMapperContext = CinnGraphSymbolization::OpMapperContext; using CinnOpDesc = CinnGraphSymbolization::CinnOpDesc; using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; diff --git a/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc index 2dd09771cc5ea..ee030bb39caa9 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc @@ -52,7 +52,7 @@ Program CreateAddProgram() { return program; } -void SetRandData(hlir::framework::Tensor tensor, Target target) { +void SetRandData(hlir::Tensor tensor, Target target) { auto* data = tensor->mutable_data(target); std::random_device seed; std::default_random_engine engine(seed()); @@ -96,8 +96,8 @@ TEST(net_build, program_execute_multi_elementwise_add) { hlir::framework::GraphCompiler gc(target, scope, graph); auto runtime_program = gc.Build(); - scope->Var("A"); - scope->Var("B"); + scope->Var("A"); + scope->Var("B"); auto A = scope->GetTensor("A"); auto B = scope->GetTensor("B"); @@ -133,10 +133,10 @@ TEST(net_build, program_execute_fc) { hlir::framework::GraphCompiler gc(target, scope, graph); auto runtime_program = gc.Build(); - scope->Var(std::string(a.id())); - scope->Var(std::string(w.id())); - scope->Var(std::string(b.id())); - scope->Var(std::string(mul_out->id)); + scope->Var(std::string(a.id())); + scope->Var(std::string(w.id())); + scope->Var(std::string(b.id())); + scope->Var(std::string(mul_out->id)); auto a_ten = scope->GetTensor(std::string(a.id())); auto w_ten = scope->GetTensor(std::string(w.id())); diff --git a/paddle/fluid/framework/program_desc.h b/paddle/fluid/framework/program_desc.h index e1dbd85f129ad..d2d0af8effd10 100644 --- a/paddle/fluid/framework/program_desc.h +++ b/paddle/fluid/framework/program_desc.h @@ -75,14 +75,14 @@ class ProgramDesc { // fetch_ops. const std::vector GetFetchTargetNames(); - // The input variable of feed_op that holds input Tensor provided by users is - // referenced as feed_holder. - // This function is used to change or unify the feed_holder variables' name. + // The input variable of feed_op that holds input phi::DenseTensor provided by + // users is referenced as feed_holder. This function is used to change or + // unify the feed_holder variables' name. void SetFeedHolderName(const std::string &feed_holder_name); - // The output variable of fetch_op that holds output Tensor needed by users is - // referenced as fetch_holder. - // This function is used to change or unify the fetch_holder variables' name. + // The output variable of fetch_op that holds output phi::DenseTensor needed + // by users is referenced as fetch_holder. This function is used to change or + // unify the fetch_holder variables' name. void SetFetchHolderName(const std::string &fetch_holder_name); std::string CachedHashString(); diff --git a/paddle/fluid/framework/save_load_util.cc b/paddle/fluid/framework/save_load_util.cc index cb6120d5e5ee6..dee25b998a67c 100644 --- a/paddle/fluid/framework/save_load_util.cc +++ b/paddle/fluid/framework/save_load_util.cc @@ -38,7 +38,7 @@ void CheckInStreamState(std::istream& istre, size_t length) { struct DeserializedDataFunctor { DeserializedDataFunctor(void** buf, - Tensor* tensor, + phi::DenseTensor* tensor, const platform::Place& place) : buf_(buf), tensor_(tensor), place_(place) {} @@ -48,7 +48,7 @@ struct DeserializedDataFunctor { } void** buf_; - Tensor* tensor_; + phi::DenseTensor* tensor_; platform::Place place_; }; @@ -58,13 +58,14 @@ size_t ReadTensorNumber(std::istream& istre) { sizeof(char) * tensor_number_mark.size()); std::string str_read_tensor_number_mark(tensor_number_mark_buffer, tensor_number_mark.size()); - PADDLE_ENFORCE_EQ(tensor_number_mark, - str_read_tensor_number_mark, - platform::errors::InvalidArgument( - "Tensor number mark does not match, expect mark is " - "[%s], but the mark read from file is [%s].", - tensor_number_mark, - str_read_tensor_number_mark)); + PADDLE_ENFORCE_EQ( + tensor_number_mark, + str_read_tensor_number_mark, + platform::errors::InvalidArgument( + "phi::DenseTensor number mark does not match, expect mark is " + "[%s], but the mark read from file is [%s].", + tensor_number_mark, + str_read_tensor_number_mark)); size_t tensor_number = 0; istre.read(reinterpret_cast(&tensor_number), sizeof(tensor_number)); @@ -82,13 +83,14 @@ std::string ReadTensorName(std::istream& istre) { std::string str_read_tensor_name_mark(name_mark_buffer, tensor_name_mark.size()); - PADDLE_ENFORCE_EQ(tensor_name_mark, - str_read_tensor_name_mark, - platform::errors::InvalidArgument( - "Tensor name mark does not match, expect mark is [%s], " - "but the mark read from file is [%s].", - tensor_name_mark, - str_read_tensor_name_mark)); + PADDLE_ENFORCE_EQ( + tensor_name_mark, + str_read_tensor_name_mark, + platform::errors::InvalidArgument( + "phi::DenseTensor name mark does not match, expect mark is [%s], " + "but the mark read from file is [%s].", + tensor_name_mark, + str_read_tensor_name_mark)); size_t tensor_name_length = 0; istre.read(reinterpret_cast(&tensor_name_length), @@ -120,7 +122,7 @@ bool SaveStaticNameListToDisk( const std::string& file_name, const std::vector& vec_tensor_name_list, const Scope& scope) { - std::map map_tensor; + std::map map_tensor; for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { auto var_ptr = scope.FindVar(vec_tensor_name_list[i]); @@ -131,7 +133,7 @@ bool SaveStaticNameListToDisk( "that exe.run(startup_program) has " "been executed.", vec_tensor_name_list[i])); - Tensor* tensor = var_ptr->GetMutable(); + phi::DenseTensor* tensor = var_ptr->GetMutable(); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, platform::errors::PreconditionNotMet( @@ -149,11 +151,11 @@ bool SaveDygraphVarBaseListToDisk( const std::string& file_name, const std::vector>& vec_var_base_list) { - std::map map_tensor; + std::map map_tensor; for (size_t i = 0; i < vec_var_base_list.size(); ++i) { auto var_ptr = vec_var_base_list[i]->MutableVar(); - Tensor* tensor = var_ptr->GetMutable(); + phi::DenseTensor* tensor = var_ptr->GetMutable(); PADDLE_ENFORCE_EQ(tensor->IsInitialized(), true, @@ -170,7 +172,7 @@ bool SaveDygraphVarBaseListToDisk( const std::vector> LoadDygraphVarBaseListFromDisk(const std::string& file_name) { - std::map> map_load_tensor; + std::map> map_load_tensor; LoadTensorFromDisk(file_name, &map_load_tensor); std::vector> vec_res; @@ -194,7 +196,7 @@ bool LoadStaticNameListFromDisk( const std::string& file_name, const std::vector& vec_tensor_name_list, const Scope& scope) { - std::map> map_load_tensor; + std::map> map_load_tensor; LoadTensorFromDisk(file_name, &map_load_tensor); for (size_t i = 0; i < vec_tensor_name_list.size(); ++i) { @@ -214,7 +216,7 @@ bool LoadStaticNameListFromDisk( "please make sure that exe.run(startup_program) has been executed.", vec_tensor_name_list[i])); - Tensor* tensor = var_ptr->GetMutable(); + phi::DenseTensor* tensor = var_ptr->GetMutable(); PADDLE_ENFORCE_NOT_NULL( tensor, platform::errors::PreconditionNotMet( @@ -261,8 +263,9 @@ bool LoadStaticNameListFromDisk( return true; } -bool SaveTensorToDisk(const std::string& file_name, - const std::map& map_tensor) { +bool SaveTensorToDisk( + const std::string& file_name, + const std::map& map_tensor) { MkDirRecursively(DirName(file_name).c_str()); std::ofstream fout(file_name, std::ios::binary); @@ -316,12 +319,13 @@ bool SaveTensorToDisk(const std::string& file_name, auto* data_ptr = tensor->data(); if (platform::is_gpu_place(tensor->place())) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - framework::Tensor temp; + phi::DenseTensor temp; TensorCopySync(*tensor, platform::CPUPlace(), &temp); data_ptr = temp.data(); #else - PADDLE_THROW(platform::errors::Unavailable( - "Tensor is in CUDA device, but paddle not compiled with CUDA.")); + PADDLE_THROW( + platform::errors::Unavailable("phi::DenseTensor is in CUDA device, " + "but paddle not compiled with CUDA.")); #endif } fout.write(static_cast(data_ptr), @@ -341,7 +345,7 @@ bool SaveTensorToDisk(const std::string& file_name, bool LoadTensorFromDisk( const std::string& file_name, - std::map>* map_tensor) { + std::map>* map_tensor) { std::ifstream fin(file_name, std::ios::binary); PADDLE_ENFORCE_EQ( @@ -356,7 +360,7 @@ bool LoadTensorFromDisk( for (size_t i = 0; i < tensor_number; ++i) { std::string str_tensor_name = ReadTensorName(fin); - std::shared_ptr tensor_temp(new Tensor()); + std::shared_ptr tensor_temp(new phi::DenseTensor()); uint32_t version; fin.read(reinterpret_cast(&version), sizeof(version)); CheckInStreamState(fin, sizeof(version)); diff --git a/paddle/fluid/framework/save_load_util.h b/paddle/fluid/framework/save_load_util.h index f4ec7fafdcb9e..4f8360d96f6d3 100644 --- a/paddle/fluid/framework/save_load_util.h +++ b/paddle/fluid/framework/save_load_util.h @@ -47,12 +47,13 @@ bool SaveDygraphVarBaseListToDisk( const std::vector> LoadDygraphVarBaseListFromDisk(const std::string& file_name); -bool SaveTensorToDisk(const std::string& file_name, - const std::map& map_tensor); +bool SaveTensorToDisk( + const std::string& file_name, + const std::map& map_tensor); bool LoadTensorFromDisk( const std::string& file_name, - std::map>* map_tensor); + std::map>* map_tensor); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/save_load_util_test.cc b/paddle/fluid/framework/save_load_util_test.cc index 623f0f27bdaa2..b8b5888236f0e 100644 --- a/paddle/fluid/framework/save_load_util_test.cc +++ b/paddle/fluid/framework/save_load_util_test.cc @@ -23,10 +23,10 @@ namespace framework { TEST(test_save_load_util, test_save_load) { srand(time(NULL)); auto cpu_place = platform::CPUPlace(); - Tensor tensor1; + phi::DenseTensor tensor1; tensor1.Resize({1000, 1000}); auto src_data_1 = tensor1.mutable_data(cpu_place); - Tensor tensor2; + phi::DenseTensor tensor2; tensor2.Resize({5000, 1000}); auto src_data_2 = tensor2.mutable_data(cpu_place); @@ -42,13 +42,13 @@ TEST(test_save_load_util, test_save_load) { src_data_2[i] = temp; } - std::map map_tensor; + std::map map_tensor; map_tensor["t1"] = &tensor1; map_tensor["t2"] = &tensor2; SaveTensorToDisk("test_1", map_tensor); - std::map> load_map_tensor; + std::map> load_map_tensor; LoadTensorFromDisk("test_1", &load_map_tensor); diff --git a/paddle/fluid/framework/selected_rows_utils_test.cc b/paddle/fluid/framework/selected_rows_utils_test.cc index 340acf53efa9d..1031a221a0796 100644 --- a/paddle/fluid/framework/selected_rows_utils_test.cc +++ b/paddle/fluid/framework/selected_rows_utils_test.cc @@ -28,7 +28,7 @@ class SelectedRowsTester : public ::testing::Test { int64_t row_numel = 100; selected_rows_.reset(new phi::SelectedRows(rows, height)); - Tensor* value = selected_rows_->mutable_value(); + phi::DenseTensor* value = selected_rows_->mutable_value(); auto* data = value->mutable_data( phi::make_ddim({static_cast(rows.size()), row_numel}), place_); for (int64_t i = 0; i < value->numel(); ++i) { @@ -98,7 +98,7 @@ TEST(SelectedRows, SparseTable) { ASSERT_TRUE(table.HasKey(6)); ASSERT_EQ(table.rows().size(), 3UL); - framework::Tensor ids; + phi::DenseTensor ids; ids.Resize(phi::make_ddim({4})); auto* ids_data = ids.mutable_data(cpu); ids_data[0] = static_cast(6); @@ -106,7 +106,7 @@ TEST(SelectedRows, SparseTable) { ids_data[2] = static_cast(8); ids_data[3] = static_cast(10); - framework::Tensor get_value; + phi::DenseTensor get_value; auto* value_data = get_value.mutable_data(phi::make_ddim({4, embedding_width}), cpu); table.Get(ids, &get_value); diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index fcb061aa93288..a83b3baa85e52 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -22,7 +22,6 @@ namespace paddle { namespace framework { using LoD = std::vector>; -using Tensor = phi::DenseTensor; } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc index fcf255dafc2e0..c9d740dcf8fc4 100644 --- a/paddle/fluid/framework/tensor_test.cc +++ b/paddle/fluid/framework/tensor_test.cc @@ -22,8 +22,8 @@ namespace framework = paddle::framework; namespace platform = paddle::platform; -TEST(Tensor, Dims) { - framework::Tensor tt; +TEST(DenseTensor, Dims) { + phi::DenseTensor tt; tt.Resize({2, 3, 4}); framework::DDim dims = tt.dims(); ASSERT_EQ(arity(dims), 3); @@ -32,8 +32,8 @@ TEST(Tensor, Dims) { } } -TEST(Tensor, DataAssert) { - framework::Tensor src_tensor; +TEST(DenseTensor, DataAssert) { + phi::DenseTensor src_tensor; bool caught = false; try { @@ -41,16 +41,16 @@ TEST(Tensor, DataAssert) { } catch (platform::EnforceNotMet& err) { caught = true; std::string ex_msg = err.what(); - EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " - "Tensor::mutable_data firstly.") != + EXPECT_TRUE(ex_msg.find("phi::DenseTensor holds no memory. Call " + "phi::DenseTensor::mutable_data firstly.") != std::string::npos); } ASSERT_TRUE(caught); } -TEST(Tensor, MutableData) { +TEST(DenseTensor, MutableData) { { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization @@ -99,9 +99,10 @@ TEST(Tensor, MutableData) { EXPECT_NE(p1, p4); EXPECT_NE(p3_holder1.get(), p3_holder2.get()); } - // Not sure if it's desired, but currently, Tensor type can be changed. + // Not sure if it's desired, but currently, phi::DenseTensor type can be + // changed. { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; int8_t* p1 = src_tensor.mutable_data(phi::make_ddim({1}), platform::CPUPlace()); EXPECT_NE(p1, nullptr); @@ -115,7 +116,7 @@ TEST(Tensor, MutableData) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization @@ -144,7 +145,7 @@ TEST(Tensor, MutableData) { #endif #ifdef PADDLE_WITH_ASCEND_CL { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization @@ -173,10 +174,10 @@ TEST(Tensor, MutableData) { #endif } -TEST(Tensor, ShareDataWith) { +TEST(DenseTensor, ShareDataWith) { { - framework::Tensor src_tensor; - framework::Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor dst_tensor; // Try to share data form uninitialized tensor bool caught = false; try { @@ -184,8 +185,8 @@ TEST(Tensor, ShareDataWith) { } catch (paddle::platform::EnforceNotMet& err) { caught = true; std::string ex_msg = err.what(); - EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " - "Tensor::mutable_data firstly.") != + EXPECT_TRUE(ex_msg.find("phi::DenseTensor holds no memory. Call " + "phi::DenseTensor::mutable_data firstly.") != std::string::npos); } ASSERT_TRUE(caught); @@ -198,8 +199,8 @@ TEST(Tensor, ShareDataWith) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - framework::Tensor src_tensor; - framework::Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor dst_tensor; src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), platform::CUDAPlace(0)); dst_tensor.ShareDataWith(src_tensor); @@ -208,8 +209,8 @@ TEST(Tensor, ShareDataWith) { #endif #ifdef PADDLE_WITH_ASCEND_CL { - framework::Tensor src_tensor; - framework::Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor dst_tensor; src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), platform::NPUPlace(0)); dst_tensor.ShareDataWith(src_tensor); @@ -218,12 +219,12 @@ TEST(Tensor, ShareDataWith) { #endif } -TEST(Tensor, Slice) { +TEST(DenseTensor, Slice) { { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({5, 3, 4}), platform::CPUPlace()); - framework::Tensor slice_tensor = src_tensor.Slice(1, 3); + phi::DenseTensor slice_tensor = src_tensor.Slice(1, 3); framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 3); EXPECT_EQ(slice_dims[0], 2); @@ -246,10 +247,10 @@ TEST(Tensor, Slice) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({6, 9}), platform::CUDAPlace(0)); - framework::Tensor slice_tensor = src_tensor.Slice(2, 6); + phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6); framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 2); EXPECT_EQ(slice_dims[0], 4); @@ -273,10 +274,10 @@ TEST(Tensor, Slice) { #ifdef PADDLE_WITH_ASCEND_CL { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({6, 9}), platform::NPUPlace(0)); - framework::Tensor slice_tensor = src_tensor.Slice(2, 6); + phi::DenseTensor slice_tensor = src_tensor.Slice(2, 6); framework::DDim slice_dims = slice_tensor.dims(); ASSERT_EQ(arity(slice_dims), 2); EXPECT_EQ(slice_dims[0], 4); @@ -299,27 +300,27 @@ TEST(Tensor, Slice) { #endif } -TEST(Tensor, ReshapeToMatrix) { - framework::Tensor src; +TEST(DenseTensor, ReshapeToMatrix) { + phi::DenseTensor src; int* src_ptr = src.mutable_data({2, 3, 4, 9}, platform::CPUPlace()); for (int i = 0; i < 2 * 3 * 4 * 9; ++i) { src_ptr[i] = i; } - framework::Tensor res = framework::ReshapeToMatrix(src, 2); + phi::DenseTensor res = framework::ReshapeToMatrix(src, 2); ASSERT_EQ(res.dims()[0], 2 * 3); ASSERT_EQ(res.dims()[1], 4 * 9); } -TEST(Tensor, Layout) { - framework::Tensor src; +TEST(DenseTensor, Layout) { + phi::DenseTensor src; ASSERT_EQ(src.layout(), framework::DataLayout::kNCHW); src.set_layout(framework::DataLayout::kAnyLayout); ASSERT_EQ(src.layout(), framework::DataLayout::kAnyLayout); } -TEST(Tensor, FP16) { +TEST(DenseTensor, FP16) { using platform::float16; - framework::Tensor src; + phi::DenseTensor src; float16* src_ptr = src.mutable_data({2, 3}, platform::CPUPlace()); for (int i = 0; i < 2 * 3; ++i) { src_ptr[i] = static_cast(i); @@ -327,15 +328,16 @@ TEST(Tensor, FP16) { EXPECT_EQ(src.memory_size(), 2 * 3 * sizeof(float16)); // EXPECT a human readable error message // src.data(); - // Tensor holds the wrong type, it holds N6paddle8platform7float16E at + // phi::DenseTensor holds the wrong type, it holds N6paddle8platform7float16E + // at // [/paddle/Paddle/paddle/fluid/framework/tensor_impl.h:43] } -TEST(Tensor, Split) { +TEST(DenseTensor, Split) { { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({6, 2}), platform::CPUPlace()); - std::vector split_tensor_list = src_tensor.Split(2, 0); + std::vector split_tensor_list = src_tensor.Split(2, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); EXPECT_EQ(split_tensor_list[0].dims()[0], 2); EXPECT_EQ(split_tensor_list[1].dims()[0], 2); @@ -361,10 +363,10 @@ TEST(Tensor, Split) { } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({6, 4}), platform::CUDAPlace(0)); - std::vector split_tensor_list = src_tensor.Split(2, 0); + std::vector split_tensor_list = src_tensor.Split(2, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); EXPECT_EQ(split_tensor_list[0].dims()[0], 2); EXPECT_EQ(split_tensor_list[1].dims()[0], 2); @@ -393,11 +395,11 @@ TEST(Tensor, Split) { #endif } -TEST(Tensor, Chunk) { +TEST(DenseTensor, Chunk) { { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({6, 2}), platform::CPUPlace()); - std::vector split_tensor_list = src_tensor.Chunk(3, 0); + std::vector split_tensor_list = src_tensor.Chunk(3, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); EXPECT_EQ(split_tensor_list[0].dims()[0], 2); EXPECT_EQ(split_tensor_list[1].dims()[0], 2); @@ -423,10 +425,10 @@ TEST(Tensor, Chunk) { } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; src_tensor.mutable_data(phi::make_ddim({6, 4}), platform::CUDAPlace(0)); - std::vector split_tensor_list = src_tensor.Chunk(3, 0); + std::vector split_tensor_list = src_tensor.Chunk(3, 0); ASSERT_EQ(split_tensor_list.size(), 3UL); EXPECT_EQ(split_tensor_list[0].dims()[0], 2); EXPECT_EQ(split_tensor_list[1].dims()[0], 2); diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index ca1a65be7d0ab..efc7f685bc90b 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -137,7 +137,7 @@ void TensorCopyImpl(const TENSOR& src, platform::is_npu_place(dst_place)) { // 1. cpu tensor -> npu pinned tensor platform::NPUPinnedPlace npu_pinned_place; - Tensor npu_pinned_tensor; + phi::DenseTensor npu_pinned_tensor; npu_pinned_tensor.Resize(src.dims()); auto npu_pinned_ptr = npu_pinned_tensor.mutable_data(npu_pinned_place, src.dtype()); @@ -179,12 +179,13 @@ void TensorCopyImpl(const TENSOR& src, auto src_npu_pinned_place = src_place; auto dst_npu_place = dst_place; auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_npu_place(ctx_place), - true, - platform::errors::PreconditionNotMet( - "Device context place mismatch. When copying Tensor " - "data from NPU Pinned memory to NPU memory, current " - "device context place should be NPU.")); + PADDLE_ENFORCE_EQ( + platform::is_npu_place(ctx_place), + true, + platform::errors::PreconditionNotMet( + "Device context place mismatch. When copying phi::DenseTensor " + "data from NPU Pinned memory to NPU memory, current " + "device context place should be NPU.")); auto ctx_npu_place = ctx_place; PADDLE_ENFORCE_EQ(dst_npu_place, ctx_npu_place, @@ -204,12 +205,13 @@ void TensorCopyImpl(const TENSOR& src, auto src_npu_place = src_place; auto dst_npu_pinned_place = dst_place; auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_npu_place(ctx_place), - true, - platform::errors::PreconditionNotMet( - "Device context place mismatch. When copying Tensor " - "data from NPU memory to NPU Pinned memory, current " - "device context place should be NPU.")); + PADDLE_ENFORCE_EQ( + platform::is_npu_place(ctx_place), + true, + platform::errors::PreconditionNotMet( + "Device context place mismatch. When copying phi::DenseTensor " + "data from NPU memory to NPU Pinned memory, current " + "device context place should be NPU.")); auto ctx_npu_place = ctx_place; PADDLE_ENFORCE_EQ(src_place, ctx_npu_place, @@ -291,12 +293,13 @@ void TensorCopyImpl(const TENSOR& src, auto src_gpu_place = src_place; auto dst_cuda_pinned_place = dst_place; auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), - true, - platform::errors::PreconditionNotMet( - "Device context place mismatch. When copying Tensor " - "data from GPU memory to CUDA Pinned memory, current " - "device context place should be GPU.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx_place), + true, + platform::errors::PreconditionNotMet( + "Device context place mismatch. When copying phi::DenseTensor " + "data from GPU memory to CUDA Pinned memory, current " + "device context place should be GPU.")); auto ctx_gpu_place = ctx_place; PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, @@ -315,12 +318,13 @@ void TensorCopyImpl(const TENSOR& src, auto src_cuda_pinned_place = src_place; auto dst_gpu_place = dst_place; auto ctx_place = ctx.GetPlace(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx_place), - true, - platform::errors::PreconditionNotMet( - "Device context place mismatch. When copying Tensor " - "data from CUDA Pinned memory to GPU memory, current " - "device context place should be GPU.")); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx_place), + true, + platform::errors::PreconditionNotMet( + "Device context place mismatch. When copying phi::DenseTensor " + "data from CUDA Pinned memory to GPU memory, current " + "device context place should be GPU.")); auto ctx_gpu_place = ctx_place; PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place, @@ -440,21 +444,21 @@ void TensorCopyImpl(const TENSOR& src, TensorCopyImpl(src, dst_place, *dev_ctx, dst); } -void TensorCopy(const Tensor& src, +void TensorCopy(const phi::DenseTensor& src, const platform::Place& dst_place, - Tensor* dst) { - TensorCopyImpl(src, dst_place, dst); + phi::DenseTensor* dst) { + TensorCopyImpl(src, dst_place, dst); } -void TensorCopy(const Tensor& src, +void TensorCopy(const phi::DenseTensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, - Tensor* dst) { - TensorCopyImpl(src, dst_place, ctx, dst); + phi::DenseTensor* dst) { + TensorCopyImpl(src, dst_place, ctx, dst); } -void TensorCopySync(const Tensor& src, +void TensorCopySync(const phi::DenseTensor& src, const platform::Place& dst_place, - Tensor* dst) { + phi::DenseTensor* dst) { if (&src == dst) { auto src_copy = src; TensorCopySync(src_copy, dst_place, dst); @@ -652,7 +656,7 @@ void TensorCopySync(const Tensor& src, } void TensorToStream(std::ostream& os, - const Tensor& tensor, + const phi::DenseTensor& tensor, const platform::DeviceContext& dev_ctx) { { // the 1st field, uint32_t version constexpr uint32_t version = 0; @@ -813,7 +817,7 @@ void TensorToStream(std::ostream& os, struct DeserializedDataFunctor { DeserializedDataFunctor(void** buf, - Tensor* tensor, + phi::DenseTensor* tensor, const platform::Place& place) : buf_(buf), tensor_(tensor), place_(place) {} @@ -823,12 +827,12 @@ struct DeserializedDataFunctor { } void** buf_; - Tensor* tensor_; + phi::DenseTensor* tensor_; platform::Place place_; }; void TensorFromStream(std::istream& is, - Tensor* tensor, + phi::DenseTensor* tensor, const platform::DeviceContext& dev_ctx, const size_t& seek, const std::vector& shape) { @@ -870,7 +874,7 @@ void TensorFromStream(std::istream& is, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_MLU) || \ defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_CUSTOM_DEVICE) - Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; cpu_tensor.Resize(phi::make_ddim(shape)); framework::VisitDataType( desc.data_type(), @@ -907,7 +911,7 @@ void TensorFromStream(std::istream& is, } void TensorFromStream(std::istream& is, - Tensor* tensor, + phi::DenseTensor* tensor, const platform::DeviceContext& dev_ctx) { uint32_t version; is.read(reinterpret_cast(&version), sizeof(version)); @@ -926,10 +930,10 @@ void TensorFromStream(std::istream& is, is.good(), true, platform::errors::Unavailable("Cannot read tensor desc size")); - PADDLE_ENFORCE_GE( - size, - 0, - platform::errors::InvalidArgument("Tensor desc size should >= 0")); + PADDLE_ENFORCE_GE(size, + 0, + platform::errors::InvalidArgument( + "phi::DenseTensor desc size should >= 0")); std::unique_ptr buf(new char[size]); is.read(reinterpret_cast(buf.get()), size); PADDLE_ENFORCE_EQ( @@ -953,7 +957,7 @@ void TensorFromStream(std::istream& is, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) || \ defined(PADDLE_WITH_XPU) || defined(PADDLE_WITH_MLU) || \ defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_CUSTOM_DEVICE) - Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; cpu_tensor.Resize(phi::make_ddim(dims)); framework::VisitDataType( desc.data_type(), @@ -994,7 +998,7 @@ void TensorFromStream(std::istream& is, // get tensor data point by DLDataType void* GetDstPtrByDLDataType(DLDataType type, - framework::Tensor* dst, + phi::DenseTensor* dst, const platform::Place& dst_place) { // vector types not currently supported PADDLE_ENFORCE_LE(type.lanes, @@ -1060,7 +1064,7 @@ void* GetDstPtrByDLDataType(DLDataType type, } } -void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst) { +void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst) { platform::CPUPlace dst_place = platform::CPUPlace(); platform::CPUPlace src_place = platform::CPUPlace(); @@ -1103,13 +1107,13 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst) { } template -std::string format_tensor(const framework::Tensor& tensor) { +std::string format_tensor(const phi::DenseTensor& tensor) { // TODO(zhiqiu): use the print option to format tensor. return "NOT IMPLEMENTED"; } template -std::ostream& print_tensor(std::ostream& os, const framework::Tensor& tensor) { +std::ostream& print_tensor(std::ostream& os, const phi::DenseTensor& tensor) { auto inspect = tensor.data(); auto element_num = tensor.numel(); @@ -1136,7 +1140,7 @@ std::ostream& print_tensor(std::ostream& os, const framework::Tensor& tensor) { template <> std::ostream& print_tensor>( - std::ostream& os, const framework::Tensor& tensor) { + std::ostream& os, const phi::DenseTensor& tensor) { auto inspect = tensor.data>(); auto element_num = tensor.numel(); @@ -1154,7 +1158,7 @@ std::ostream& print_tensor>( template <> std::ostream& print_tensor>( - std::ostream& os, const framework::Tensor& tensor) { + std::ostream& os, const phi::DenseTensor& tensor) { auto inspect = tensor.data>(); auto element_num = tensor.numel(); diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 0780976b2c6f0..d1dc5e45c2d8c 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -60,13 +60,13 @@ class PrintOptions { }; void TensorToStream(std::ostream& os, - const Tensor& tensor, + const phi::DenseTensor& tensor, const platform::DeviceContext& dev_ctx); void TensorFromStream(std::istream& is, - Tensor* tensor, + phi::DenseTensor* tensor, const platform::DeviceContext& dev_ctx); void TensorFromStream(std::istream& is, - Tensor* tensor, + phi::DenseTensor* tensor, const platform::DeviceContext& dev_ctx, const size_t& seek, const std::vector& shape); @@ -77,10 +77,10 @@ void TensorFromStream(std::istream& is, // If ctx_place and src_place are the same, src_ctx.Wait() is added // after memory::Copy; if ctx_place and dst_place are the same, // src_ctx.Wait() is added before memory::Copy. -void TensorCopy(const Tensor& src, +void TensorCopy(const phi::DenseTensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, - Tensor* dst); + phi::DenseTensor* dst); // NOTE(zcd): If the src.place() and dst_place are two different GPU, // the copy operation is carried out on the dst_place's stream. This is @@ -89,30 +89,30 @@ void TensorCopy(const Tensor& src, // stream, if this copy operation is carried out on the src_place's stream, // when dst is used in dst_place's stream the copy operation may be // not completed. -void TensorCopy(const Tensor& src, +void TensorCopy(const phi::DenseTensor& src, const platform::Place& dst_place, - Tensor* dst); + phi::DenseTensor* dst); -void TensorCopySync(const Tensor& src, +void TensorCopySync(const phi::DenseTensor& src, const platform::Place& dst_place, - Tensor* dst); + phi::DenseTensor* dst); template void TensorFromVector(const std::vector& src, const platform::DeviceContext& ctx, - Tensor* dst); + phi::DenseTensor* dst); template -void TensorFromVector(const std::vector& src, Tensor* dst); +void TensorFromVector(const std::vector& src, phi::DenseTensor* dst); template -void TensorToVector(const Tensor& src, +void TensorToVector(const phi::DenseTensor& src, const platform::DeviceContext& ctx, std::vector* dst); template -void TesnorToVector(const Tensor& src, std::vector* dst); +void TesnorToVector(const phi::DenseTensor& src, std::vector* dst); // convert dlpack's DLTensor to tensor -void TensorFromDLPack(const ::DLTensor& dl_tensor, framework::Tensor* dst); +void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst); // // The implementation of template functions. @@ -122,7 +122,7 @@ template void TensorFromArray(const T* src, const size_t& array_size, const platform::DeviceContext& ctx, - Tensor* dst) { + phi::DenseTensor* dst) { auto dst_place = ctx.GetPlace(); auto src_ptr = static_cast(src); platform::CPUPlace src_place; @@ -147,7 +147,7 @@ void TensorFromArray(const T* src, else if (platform::is_npu_place(dst_place)) { // NOLINT // 1. vector -> npu pinned tensor platform::NPUPinnedPlace npu_pinned_place; - Tensor npu_pinned_tensor; + phi::DenseTensor npu_pinned_tensor; npu_pinned_tensor.Resize(dst->dims()); auto npu_pinned_ptr = npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype()); @@ -199,7 +199,7 @@ void TensorFromArray(const T* src, template void TensorFromVector(const std::vector& src, const platform::DeviceContext& ctx, - Tensor* dst) { + phi::DenseTensor* dst) { auto dst_place = ctx.GetPlace(); auto src_ptr = static_cast(src.data()); platform::CPUPlace src_place; @@ -229,7 +229,7 @@ void TensorFromVector(const std::vector& src, // so pass nullptr as stream to memory::Copy(). else if (platform::is_npu_place(dst_place)) { // NOLINT // 1. vector -> npu pinned tensor - Tensor npu_pinned_tensor(dst->dtype()); + phi::DenseTensor npu_pinned_tensor(dst->dtype()); platform::NPUPinnedPlace npu_pinned_place; auto npu_pinned_ptr = npu_pinned_tensor.mutable_data(dst->dims(), npu_pinned_place); @@ -288,7 +288,7 @@ void TensorFromVector(const std::vector& src, template <> inline void TensorFromVector(const std::vector& src, const platform::DeviceContext& ctx, - Tensor* dst) { + phi::DenseTensor* dst) { // vector has no data() member, use array instead. // See details: // https://stackoverflow.com/questions/46115669/why-does-stdvectorbool-have-no-data/46115714 @@ -321,7 +321,7 @@ inline void TensorFromVector(const std::vector& src, else if (platform::is_npu_place(dst_place)) { // NOLINT // 1. vector -> npu pinned tensor platform::NPUPinnedPlace npu_pinned_place; - Tensor npu_pinned_tensor; + phi::DenseTensor npu_pinned_tensor; npu_pinned_tensor.Resize(dst->dims()); auto npu_pinned_ptr = npu_pinned_tensor.mutable_data(npu_pinned_place, dst->dtype()); @@ -368,7 +368,7 @@ inline void TensorFromVector(const std::vector& src, } template -void TensorFromVector(const std::vector& src, Tensor* dst) { +void TensorFromVector(const std::vector& src, phi::DenseTensor* dst) { platform::CPUPlace dst_place = platform::CPUPlace(); auto src_ptr = static_cast(src.data()); platform::CPUPlace src_place; @@ -380,7 +380,8 @@ void TensorFromVector(const std::vector& src, Tensor* dst) { } template <> -inline void TensorFromVector(const std::vector& src, Tensor* dst) { +inline void TensorFromVector(const std::vector& src, + phi::DenseTensor* dst) { bool* array = new bool[src.size()]; for (unsigned int i = 0; i < src.size(); i++) { array[i] = static_cast(src[i]); @@ -397,7 +398,7 @@ inline void TensorFromVector(const std::vector& src, Tensor* dst) { } template -void TensorToVector(const Tensor& src, +void TensorToVector(const phi::DenseTensor& src, const platform::DeviceContext& ctx, std::vector* dst) { auto src_ptr = static_cast(src.data()); @@ -453,7 +454,7 @@ void TensorToVector(const Tensor& src, } template <> -inline void TensorToVector(const Tensor& src, +inline void TensorToVector(const phi::DenseTensor& src, const platform::DeviceContext& ctx, std::vector* dst) { auto src_ptr = static_cast(src.data()); @@ -505,7 +506,7 @@ inline void TensorToVector(const Tensor& src, } template -void TensorToVector(const Tensor& src, std::vector* dst) { +void TensorToVector(const phi::DenseTensor& src, std::vector* dst) { auto src_ptr = static_cast(src.data()); auto size = src.numel() * sizeof(T); @@ -524,7 +525,8 @@ void TensorToVector(const Tensor& src, std::vector* dst) { } template <> -inline void TensorToVector(const Tensor& src, std::vector* dst) { +inline void TensorToVector(const phi::DenseTensor& src, + std::vector* dst) { auto src_ptr = static_cast(src.data()); auto size = src.numel() * sizeof(bool); @@ -551,31 +553,32 @@ inline void TensorToVector(const Tensor& src, std::vector* dst) { std::ostream& operator<<(std::ostream& os, const LoD& lod); -inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { +inline phi::DenseTensor ReshapeToMatrix(const phi::DenseTensor& src, + int num_col_dims) { int rank = src.dims().size(); PADDLE_ENFORCE_GE( rank, 2, platform::errors::InvalidArgument( "'ReshapeToMatrix()' is only used for flatten high rank " - "tensors to matrixs. The dimensions of Tensor must be " + "tensors to matrixs. The dimensions of phi::DenseTensor must be " "greater or equal than 2. " - "But received dimensions of Tensor is %d", + "But received dimensions of phi::DenseTensor is %d", rank)); if (rank == 2) { return src; } - Tensor res; + phi::DenseTensor res; res.ShareDataWith(src); res.Resize(phi::flatten_to_2d(src.dims(), num_col_dims)); return res; } template -inline T GetValue(const framework::Tensor* x) { +inline T GetValue(const phi::DenseTensor* x) { T value = static_cast(0); if (!platform::is_cpu_place(x->place())) { - framework::Tensor cpu_x; + phi::DenseTensor cpu_x; framework::TensorCopy(*x, platform::CPUPlace(), &cpu_x); #if defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_MLU) platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index c7db2186e5db7..3d3c7de73b729 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -12,18 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/tensor_util.h" +#include + #include -#include "paddle/fluid/operators/isfinite_op.h" -#include +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/isfinite_op.h" namespace paddle { namespace framework { TEST(TensorCopy, Tensor) { - Tensor src_tensor; - Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor dst_tensor; phi::CPUContext cpu_ctx((platform::CPUPlace())); int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), @@ -49,7 +50,7 @@ TEST(TensorCopy, Tensor) { EXPECT_TRUE(dst_tensor.layout() == src_tensor.layout()); - Tensor slice_tensor = src_tensor.Slice(1, 2); + phi::DenseTensor slice_tensor = src_tensor.Slice(1, 2); TensorCopy(slice_tensor, *cpu_place, &dst_tensor); const int* slice_ptr = slice_tensor.data(); dst_ptr = dst_tensor.data(); @@ -61,9 +62,9 @@ TEST(TensorCopy, Tensor) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - Tensor src_tensor; - Tensor gpu_tensor; - Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor gpu_tensor; + phi::DenseTensor dst_tensor; int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), platform::CPUPlace()); @@ -71,7 +72,7 @@ TEST(TensorCopy, Tensor) { int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; memcpy(src_ptr, arr, 9 * sizeof(int)); - // CPU Tensor to GPU Tensor + // CPU phi::DenseTensor to GPU phi::DenseTensor auto gpu_place = new platform::CUDAPlace(0); phi::GPUContext gpu_ctx(*gpu_place); gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() @@ -80,7 +81,7 @@ TEST(TensorCopy, Tensor) { gpu_ctx.PartialInitWithAllocator(); TensorCopy(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor); - // GPU Tensor to CPU Tensor + // GPU phi::DenseTensor to CPU phi::DenseTensor auto cpu_place = new platform::CPUPlace(); TensorCopy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); @@ -101,12 +102,12 @@ TEST(TensorCopy, Tensor) { EXPECT_EQ(src_ptr[i], dst_ptr_tmp[i]); } - Tensor slice_tensor = src_tensor.Slice(1, 2); + phi::DenseTensor slice_tensor = src_tensor.Slice(1, 2); - // CPU Slice Tensor to GPU Tensor + // CPU Slice phi::DenseTensor to GPU phi::DenseTensor TensorCopy(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor); - // GPU Tensor to CPU Tensor + // GPU phi::DenseTensor to CPU phi::DenseTensor TensorCopy(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor); // Sync before Compare Slice Tensors @@ -126,9 +127,9 @@ TEST(TensorCopy, Tensor) { TEST(TensorFromVector, Tensor) { { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - paddle::framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; - // Copy to CPU Tensor + // Copy to CPU phi::DenseTensor cpu_tensor.Resize(phi::make_ddim({3, 3})); auto cpu_place = new paddle::platform::CPUPlace(); paddle::framework::TensorFromVector(src_vec, &cpu_tensor); @@ -157,11 +158,11 @@ TEST(TensorFromVector, Tensor) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - paddle::framework::Tensor cpu_tensor; - paddle::framework::Tensor gpu_tensor; - paddle::framework::Tensor dst_tensor; + phi::DenseTensor cpu_tensor; + phi::DenseTensor gpu_tensor; + phi::DenseTensor dst_tensor; - // Copy to CPU Tensor + // Copy to CPU phi::DenseTensor cpu_tensor.Resize(phi::make_ddim({3, 3})); auto cpu_place = new paddle::platform::CPUPlace(); phi::CPUContext cpu_ctx(*cpu_place); @@ -219,7 +220,7 @@ TEST(TensorFromVector, Tensor) { TEST(TensorToVector, Tensor) { { - paddle::framework::Tensor src; + phi::DenseTensor src; int* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); for (int i = 0; i < 3 * 3; ++i) { src_ptr[i] = i; @@ -236,7 +237,7 @@ TEST(TensorToVector, Tensor) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - paddle::framework::Tensor gpu_tensor; + phi::DenseTensor gpu_tensor; paddle::platform::CUDAPlace place; phi::GPUContext gpu_ctx(place); gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() @@ -255,19 +256,22 @@ TEST(TensorToVector, Tensor) { #endif } -TEST(TensorToVector, Tensor_bool){{paddle::framework::Tensor src; -bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); -for (int i = 0; i < 3 * 3; ++i) { - src_ptr[i] = static_cast(i % 2); -} +TEST(TensorToVector, Tensor_bool) { + phi::DenseTensor src; + bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); + for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = static_cast(i % 2); + } -paddle::platform::CPUPlace place; -std::vector dst; -paddle::framework::TensorToVector(src, &dst); + paddle::platform::CPUPlace place; + std::vector dst; + paddle::framework::TensorToVector(src, &dst); -for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_ptr[i], dst[i]); + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); + } } + } // namespace framework #ifdef PADDLE_WITH_CUDA @@ -283,7 +287,7 @@ for (int i = 0; i < 3 * 3; ++i) { true, false, }; - paddle::framework::Tensor gpu_tensor; + phi::DenseTensor gpu_tensor; paddle::platform::CUDAPlace place; phi::GPUContext gpu_ctx(place); gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() @@ -313,7 +317,7 @@ for (int i = 0; i < 3 * 3; ++i) { true, false, }; - paddle::framework::Tensor npu_tensor; + phi::DenseTensor npu_tensor; paddle::platform::NPUPlace place(0); paddle::platform::NPUDeviceContext npu_ctx(place); paddle::framework::TensorFromVector(src_vec, npu_ctx, &npu_tensor); @@ -331,7 +335,7 @@ for (int i = 0; i < 3 * 3; ++i) { TEST(TensorFromDLPack, Tensor) { { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - paddle::framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; cpu_tensor.Resize(phi::make_ddim({3, 3})); paddle::platform::CPUPlace cpu_place; @@ -339,7 +343,7 @@ TEST(TensorFromDLPack, Tensor) { paddle::framework::TensorFromVector(src_vec, cpu_ctx, &cpu_tensor); paddle::framework::DLPackTensor dlpack_tensor(cpu_tensor, 1); - paddle::framework::Tensor dst_tensor; + phi::DenseTensor dst_tensor; paddle::framework::TensorFromDLPack(dlpack_tensor, &dst_tensor); auto cpu_ptr = cpu_tensor.data(); @@ -353,12 +357,12 @@ TEST(TensorFromDLPack, Tensor) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { std::vector src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - paddle::framework::Tensor cpu_tensor; - paddle::framework::Tensor gpu_tensor; - paddle::framework::Tensor dst_tensor; - paddle::framework::Tensor gpu_tensor_from_dlpack; + phi::DenseTensor cpu_tensor; + phi::DenseTensor gpu_tensor; + phi::DenseTensor dst_tensor; + phi::DenseTensor gpu_tensor_from_dlpack; - // Copy to CPU Tensor + // Copy to CPU phi::DenseTensor cpu_tensor.Resize(phi::make_ddim({3, 3})); paddle::platform::CPUPlace cpu_place; phi::CPUContext cpu_ctx(cpu_place); @@ -396,7 +400,7 @@ TEST(TensorFromDLPack, Tensor) { TEST(TensorContainsNAN, CPU) { { - paddle::framework::Tensor src; + phi::DenseTensor src; float* buf = src.mutable_data({3}, paddle::platform::CPUPlace()); buf[0] = 0.0; buf[1] = NAN; @@ -407,7 +411,7 @@ TEST(TensorContainsNAN, CPU) { } { - paddle::framework::Tensor src; + phi::DenseTensor src; paddle::platform::float16* buf = src.mutable_data( {3}, paddle::platform::CPUPlace()); @@ -422,7 +426,7 @@ TEST(TensorContainsNAN, CPU) { TEST(TensorContainsInf, CPU) { { - paddle::framework::Tensor src; + phi::DenseTensor src; double* buf = src.mutable_data({3}, paddle::platform::CPUPlace()); buf[0] = 1.0; buf[1] = INFINITY; @@ -433,7 +437,7 @@ TEST(TensorContainsInf, CPU) { } { - paddle::framework::Tensor src; + phi::DenseTensor src; paddle::platform::float16* buf = src.mutable_data( {3}, paddle::platform::CPUPlace()); @@ -448,7 +452,7 @@ TEST(TensorContainsInf, CPU) { TEST(TensorIsfinite, CPU) { { - paddle::framework::Tensor src, out; + phi::DenseTensor src, out; double* buf = src.mutable_data({3}, paddle::platform::CPUPlace()); buf[0] = 1.0; buf[1] = INFINITY; @@ -461,7 +465,7 @@ TEST(TensorIsfinite, CPU) { } { - paddle::framework::Tensor src, out; + phi::DenseTensor src, out; double* buf = src.mutable_data({3}, paddle::platform::CPUPlace()); buf[0] = 1.0; buf[1] = NAN; @@ -474,7 +478,7 @@ TEST(TensorIsfinite, CPU) { } { - paddle::framework::Tensor src, out; + phi::DenseTensor src, out; paddle::platform::float16* buf = src.mutable_data( {3}, paddle::platform::CPUPlace()); @@ -493,7 +497,7 @@ TEST(TensorIsfinite, CPU) { } TEST(Tensor, FromAndToStream) { - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; int array[6] = {1, 2, 3, 4, 5, 6}; src_tensor.Resize({2, 3}); int* src_ptr = src_tensor.mutable_data(platform::CPUPlace()); @@ -501,7 +505,7 @@ TEST(Tensor, FromAndToStream) { src_ptr[i] = array[i]; } { - framework::Tensor dst_tensor; + phi::DenseTensor dst_tensor; auto place = new platform::CPUPlace(); phi::CPUContext cpu_ctx(*place); std::ostringstream oss; @@ -518,9 +522,9 @@ TEST(Tensor, FromAndToStream) { } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) { - Tensor gpu_tensor; + phi::DenseTensor gpu_tensor; gpu_tensor.Resize({2, 3}); - Tensor dst_tensor; + phi::DenseTensor dst_tensor; auto gpu_place = new platform::CUDAPlace(); phi::GPUContext gpu_ctx(*gpu_place); diff --git a/paddle/fluid/framework/tensor_util_test.cu b/paddle/fluid/framework/tensor_util_test.cu index 53807beab9171..e4e49340e6615 100644 --- a/paddle/fluid/framework/tensor_util_test.cu +++ b/paddle/fluid/framework/tensor_util_test.cu @@ -62,7 +62,7 @@ TEST(TensorContainsNAN, GPU) { auto& pool = paddle::platform::DeviceContextPool::Instance(); auto* cuda_ctx = pool.GetByPlace(gpu); { - Tensor tensor; + phi::DenseTensor tensor; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillNAN, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -73,7 +73,7 @@ TEST(TensorContainsNAN, GPU) { ASSERT_TRUE(TensorContainsNAN(tensor)); } { - Tensor tensor; + phi::DenseTensor tensor; paddle::platform::float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP @@ -91,7 +91,7 @@ TEST(TensorContainsInf, GPU) { auto& pool = paddle::platform::DeviceContextPool::Instance(); auto* cuda_ctx = pool.GetByPlace(gpu); { - Tensor tensor; + phi::DenseTensor tensor; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillInf, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -102,7 +102,7 @@ TEST(TensorContainsInf, GPU) { ASSERT_TRUE(TensorContainsInf(tensor)); } { - Tensor tensor; + phi::DenseTensor tensor; paddle::platform::float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP @@ -122,7 +122,7 @@ TEST(TensorIsfinite, GPU) { auto* cuda_ctx = pool.GetByPlace(gpu); // contains inf { - Tensor tensor; + phi::DenseTensor tensor; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillInf, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -133,7 +133,7 @@ TEST(TensorIsfinite, GPU) { EXPECT_TRUE(!TensorIsfinite(tensor)); } { - Tensor tensor; + phi::DenseTensor tensor; float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillInf, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -146,7 +146,7 @@ TEST(TensorIsfinite, GPU) { // contains nan { - Tensor tensor; + phi::DenseTensor tensor; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillNAN, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -157,7 +157,7 @@ TEST(TensorIsfinite, GPU) { EXPECT_TRUE(!TensorIsfinite(tensor)); } { - Tensor tensor; + phi::DenseTensor tensor; float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillNAN, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -170,7 +170,7 @@ TEST(TensorIsfinite, GPU) { // all element are finite { - Tensor tensor; + phi::DenseTensor tensor; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( @@ -182,7 +182,7 @@ TEST(TensorIsfinite, GPU) { EXPECT_TRUE(TensorIsfinite(tensor)); } { - Tensor tensor; + phi::DenseTensor tensor; float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( @@ -200,7 +200,7 @@ TEST(TensorContainsInf, GPUWithoutWait) { auto& pool = paddle::platform::DeviceContextPool::Instance(); auto* cuda_ctx = pool.GetByPlace(gpu); { - Tensor tensor, out; + phi::DenseTensor tensor, out; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillInf, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -210,13 +210,13 @@ TEST(TensorContainsInf, GPUWithoutWait) { cuda_ctx->Wait(); TensorContainsInf(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); ASSERT_EQ(tmp.data()[0], true); } { - Tensor tensor, out; + phi::DenseTensor tensor, out; paddle::platform::float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP @@ -227,7 +227,7 @@ TEST(TensorContainsInf, GPUWithoutWait) { cuda_ctx->Wait(); TensorContainsInf(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); ASSERT_EQ(tmp.data()[0], true); @@ -239,7 +239,7 @@ TEST(TensorContainsNAN, GPUWithoutWait) { auto& pool = paddle::platform::DeviceContextPool::Instance(); auto* cuda_ctx = pool.GetByPlace(gpu); { - Tensor tensor, out; + phi::DenseTensor tensor, out; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillNAN, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -249,13 +249,13 @@ TEST(TensorContainsNAN, GPUWithoutWait) { cuda_ctx->Wait(); TensorContainsNAN(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); ASSERT_EQ(tmp.data()[0], true); } { - Tensor tensor, out; + phi::DenseTensor tensor, out; paddle::platform::float16* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP @@ -266,7 +266,7 @@ TEST(TensorContainsNAN, GPUWithoutWait) { cuda_ctx->Wait(); TensorContainsNAN(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); ASSERT_EQ(tmp.data()[0], true); @@ -278,7 +278,7 @@ TEST(TensorIsfinite, GPUWithoutWait) { auto& pool = paddle::platform::DeviceContextPool::Instance(); auto* cuda_ctx = pool.GetByPlace(gpu); { - Tensor tensor, out; + phi::DenseTensor tensor, out; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillInf, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -288,13 +288,13 @@ TEST(TensorIsfinite, GPUWithoutWait) { cuda_ctx->Wait(); TensorIsfinite(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); EXPECT_EQ(tmp.data()[0], false); } { - Tensor tensor, out; + phi::DenseTensor tensor, out; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL(FillNAN, dim3(1), dim3(1), 0, cuda_ctx->stream(), buf); @@ -304,13 +304,13 @@ TEST(TensorIsfinite, GPUWithoutWait) { cuda_ctx->Wait(); TensorIsfinite(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); EXPECT_EQ(tmp.data()[0], false); } { - Tensor tensor, out; + phi::DenseTensor tensor, out; float* buf = tensor.mutable_data({3}, gpu); #ifdef PADDLE_WITH_HIP hipLaunchKernelGGL( @@ -321,7 +321,7 @@ TEST(TensorIsfinite, GPUWithoutWait) { cuda_ctx->Wait(); TensorIsfinite(tensor, &out); platform::CPUPlace cpu; - Tensor tmp; + phi::DenseTensor tmp; TensorCopy(out, cpu, *cuda_ctx, &tmp); cuda_ctx->Wait(); EXPECT_EQ(tmp.data()[0], true); diff --git a/paddle/fluid/framework/tuple.h b/paddle/fluid/framework/tuple.h index a06f92f32d28c..0cf7c70b9bf81 100644 --- a/paddle/fluid/framework/tuple.h +++ b/paddle/fluid/framework/tuple.h @@ -31,7 +31,7 @@ typedef paddle::variant ElementVar; diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index ea7ebce2dae6b..a0a40682a9667 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -178,7 +178,7 @@ struct VarTypeRegistryImpl { // Users should add other variable types below. // Paddle would generate unique Ids for each registered variable types. using VarTypeRegistry = detail::VarTypeRegistryImpl< - Tensor, + phi::DenseTensor, phi::SelectedRows, std::vector, LoDRankTable, diff --git a/paddle/fluid/framework/variable.h b/paddle/fluid/framework/variable.h index 68876de0f795a..5524433ce7b3a 100644 --- a/paddle/fluid/framework/variable.h +++ b/paddle/fluid/framework/variable.h @@ -123,18 +123,19 @@ inline phi::DenseTensor::InplaceVersion* Variable::InplaceVersionCounter() { if (IsType()) { version_counter_ptr = &GetMutable()->InplaceVersionCounter(); - } else if (IsType()) { + } else if (IsType()) { version_counter_ptr = - &GetMutable()->InplaceVersionCounter(); + &GetMutable()->InplaceVersionCounter(); } else if (IsType()) { version_counter_ptr = &GetMutable() ->mutable_value() ->InplaceVersionCounter(); } else { - VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have " - "TensorInplaceVersion, but received type " - << platform::demangle(framework::ToTypeName(Type())); + VLOG(4) + << "Only supports phi::DenseTensor, LoDTensor, SelectedRows to have " + "TensorInplaceVersion, but received type " + << platform::demangle(framework::ToTypeName(Type())); } return version_counter_ptr; } @@ -159,9 +160,10 @@ inline void Variable::BumpInplaceVersion() { if (version_counter_ptr) { return version_counter_ptr->Bump(); } else { - VLOG(4) << "Only supports Tensor, LoDTensor, SelectedRows to have " - "TensorInplaceVersion, but received type " - << platform::demangle(framework::ToTypeName(Type())); + VLOG(4) + << "Only supports phi::DenseTensor, LoDTensor, SelectedRows to have " + "TensorInplaceVersion, but received type " + << platform::demangle(framework::ToTypeName(Type())); } } } // namespace framework diff --git a/paddle/fluid/framework/variable_test.cc b/paddle/fluid/framework/variable_test.cc index b998efc1c230e..22af9ae934e0c 100644 --- a/paddle/fluid/framework/variable_test.cc +++ b/paddle/fluid/framework/variable_test.cc @@ -29,7 +29,7 @@ TEST(Variable, GetMutable) { EXPECT_EQ("1234", tt); try { - v->GetMutable(); + v->GetMutable(); } catch (std::exception& e) { return; } diff --git a/paddle/fluid/framework/version.cc b/paddle/fluid/framework/version.cc index c01bef79cdccd..9f07f0f5587b1 100644 --- a/paddle/fluid/framework/version.cc +++ b/paddle/fluid/framework/version.cc @@ -20,7 +20,7 @@ namespace paddle { namespace framework { bool IsProgramVersionSupported(int64_t version) { - /* So far, all old versions of Tensor are supported in the + /* So far, all old versions of phi::DenseTensor are supported in the * new version. The compatibility judgment cannot be made only * by the version number. Please do not use this interface, * it may be discarded because backward compatibility. @@ -29,7 +29,7 @@ bool IsProgramVersionSupported(int64_t version) { } bool IsTensorVersionSupported(uint32_t version) { - /* So far, all old versions of Tensor are supported in the + /* So far, all old versions of phi::DenseTensor are supported in the * new version. The compatibility judgment cannot be made only * by the version number. Please do not use this interface, * it may be discarded because backward compatibility. diff --git a/paddle/fluid/framework/version.h b/paddle/fluid/framework/version.h index 1bb627775942a..b7ec4ecc11ca5 100644 --- a/paddle/fluid/framework/version.h +++ b/paddle/fluid/framework/version.h @@ -21,7 +21,7 @@ namespace paddle { namespace framework { // Note: -// Program and Tensor that pass the IsXXXVersionSupported should +// Program and phi::DenseTensor that pass the IsXXXVersionSupported should // be supported by the current codes. Otherwise, it's a compatibility // bug. diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index c9d3d2591d000..4064c65be6708 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -53,8 +53,8 @@ static const platform::Place &GetVarPlace(const framework::Variable &src) { } } -static void AllReduce(const framework::Tensor &src, - framework::Tensor *dst, +static void AllReduce(const phi::DenseTensor &src, + phi::DenseTensor *dst, const gpuStream_t stream, const platform::NCCLComm *comm) { const auto &place = src.place(); diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index c4b622f98505f..3b2e299a2a745 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -150,7 +150,7 @@ void BasicEngine::CheckBackwardInputs(const OpBase& op) { } auto* inner_var = var->MutableVar(); - framework::Tensor* tensor = nullptr; + phi::DenseTensor* tensor = nullptr; if (!inner_var->IsInitialized() || inner_var->IsType()) { tensor = inner_var->GetMutable(); diff --git a/paddle/fluid/imperative/bkcl_context.cc b/paddle/fluid/imperative/bkcl_context.cc index 831e7dae942ae..4fc21b05c44c7 100644 --- a/paddle/fluid/imperative/bkcl_context.cc +++ b/paddle/fluid/imperative/bkcl_context.cc @@ -33,8 +33,8 @@ namespace paddle { namespace imperative { -static void AllReduce(const framework::Tensor &src, - framework::Tensor *dst, +static void AllReduce(const phi::DenseTensor &src, + phi::DenseTensor *dst, const XPUStream stream, const platform::BKCLComm *comm) { const auto &place = src.place(); @@ -181,7 +181,7 @@ void BKCLParallelContext::AllReduceByStream(const framework::Variable &src, void BKCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { VLOG(3) << "/// DEBUG /// start inter broadcast with ring_id: " << ring_id; - framework::Tensor *src_tensor = src->GetMutable(); + phi::DenseTensor *src_tensor = src->GetMutable(); const auto &place = src_tensor->place(); platform::BKCLComm *comm = platform::BKCLCommContext::Instance().Get(ring_id, place); diff --git a/paddle/fluid/imperative/cncl_context.cc b/paddle/fluid/imperative/cncl_context.cc index 9fc2cd6408b21..02242f9593e1b 100644 --- a/paddle/fluid/imperative/cncl_context.cc +++ b/paddle/fluid/imperative/cncl_context.cc @@ -34,8 +34,8 @@ class Variable; namespace paddle { namespace imperative { -static void AllReduce(const framework::Tensor &src, - framework::Tensor *dst, +static void AllReduce(const phi::DenseTensor &src, + phi::DenseTensor *dst, const mluStream stream, const platform::CNCLComm *comm) { const auto &place = src.place(); @@ -174,7 +174,7 @@ void CNCLParallelContext::AllReduceByStream(const framework::Variable &src, void CNCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { VLOG(3) << "/// DEBUG /// start inter broadcast with ring_id: " << ring_id; - framework::Tensor *src_tensor = src->GetMutable(); + phi::DenseTensor *src_tensor = src->GetMutable(); const auto &place = src_tensor->place(); platform::CNCLComm *comm = platform::CNCLCommContext::Instance().Get(ring_id, place); diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc index b6c21bead4182..ea140f8ecbee4 100644 --- a/paddle/fluid/imperative/gloo_context.cc +++ b/paddle/fluid/imperative/gloo_context.cc @@ -108,8 +108,8 @@ void GLOOParallelContext::AllReduceByStream(const framework::Variable &src, } } -void GLOOParallelContext::AllReduce(const framework::Tensor &src_tensor, - framework::Tensor *dst_tensor) { +void GLOOParallelContext::AllReduce(const phi::DenseTensor &src_tensor, + phi::DenseTensor *dst_tensor) { auto gloo_wrapper = framework::GlooWrapper::GetInstance(); dst_tensor->Resize(src_tensor.dims()); switch (framework::TransToProtoVarType(src_tensor.dtype())) { diff --git a/paddle/fluid/imperative/gloo_context.h b/paddle/fluid/imperative/gloo_context.h index 5290e3d1315a4..0e82175de0b0c 100644 --- a/paddle/fluid/imperative/gloo_context.h +++ b/paddle/fluid/imperative/gloo_context.h @@ -60,7 +60,7 @@ class GLOOParallelContext : public ParallelContext { void SynchronizeCompute() override; private: - void AllReduce(const framework::Tensor& src, framework::Tensor* dst); + void AllReduce(const phi::DenseTensor& src, phi::DenseTensor* dst); void AllReduce(const phi::SelectedRows& src, phi::SelectedRows* dst); private: diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 199359a960326..594b105dc6bce 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -85,8 +85,8 @@ static void MoveOrCopyVar(framework::Variable* dst, #ifdef PADDLE_WITH_XPU template void XPUTensorAddFunctor(const platform::Place& place, - const framework::Tensor& src, - framework::Tensor* dst) { + const phi::DenseTensor& src, + phi::DenseTensor* dst) { using XPUType = typename XPUTypeTrait::Type; platform::XPUDeviceContext* ctx = dynamic_cast( platform::DeviceContextPool::Instance().Get(place)); diff --git a/paddle/fluid/imperative/hccl_context.cc b/paddle/fluid/imperative/hccl_context.cc index 975f7896f0d48..5ee4417bd29cd 100644 --- a/paddle/fluid/imperative/hccl_context.cc +++ b/paddle/fluid/imperative/hccl_context.cc @@ -32,8 +32,8 @@ class Variable; namespace paddle { namespace imperative { -static void AllReduce(const framework::Tensor &src, - framework::Tensor *dst, +static void AllReduce(const phi::DenseTensor &src, + phi::DenseTensor *dst, const aclrtStream stream, const platform::HCCLComm *comm) { const auto &place = src.place(); @@ -175,7 +175,7 @@ void HCCLParallelContext::AllReduceByStream(const framework::Variable &src, void HCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { VLOG(3) << "/// DEBUG /// start inter broadcast with ring_id: " << ring_id; if (src->IsType()) { - framework::Tensor *src_tensor = src->GetMutable(); + phi::DenseTensor *src_tensor = src->GetMutable(); const auto &place = src_tensor->place(); platform::HCCLComm *comm = platform::HCCLCommContext::Instance().Get(ring_id, place); diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index 94ac86e97e157..c069d7ed10908 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -143,7 +143,7 @@ void NCCLParallelContext::AllReduceByStream(const framework::Variable &src, void NCCLParallelContext::Broadcast(framework::Variable *src, int ring_id) { VLOG(3) << "/// DEBUG /// start inter broadcast with ring_id: " << ring_id; - framework::Tensor *src_tensor = src->GetMutable(); + phi::DenseTensor *src_tensor = src->GetMutable(); const auto &place = src_tensor->place(); platform::NCCLComm *comm = platform::NCCLCommContext::Instance().Get(ring_id, place); diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 62bbf77a2df1d..61ac4b90b5154 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -58,7 +58,7 @@ const std::shared_ptr& GetVariableWrapper( return var; } -const framework::Tensor* GetTensorFromVar(const framework::Variable& var) { +const phi::DenseTensor* GetTensorFromVar(const framework::Variable& var) { if (var.IsType()) { return &(var.Get()); } else if (var.IsType()) { @@ -91,7 +91,7 @@ void HandleComplexGradToRealGrad(const NameVarMap& outs) { << " var `" << var->Name() << "` to " << framework::DataTypeToString(var->ForwardDataType()) << " real var in dynamic graph."; - framework::Tensor out; + phi::DenseTensor out; framework::TransComplexToReal( var->ForwardDataType(), var->DataType(), *tensor, &out); SetTensorToVariable(var->Var(), out, var->MutableVar()); diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index 58cae0faead9f..dfa18814de958 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -38,7 +38,7 @@ DECLARE_bool(use_mkldnn); namespace paddle { namespace imperative { -const framework::Tensor* GetTensorFromVar(const framework::Variable& var); +const phi::DenseTensor* GetTensorFromVar(const framework::Variable& var); template static void SetForwardDataTypeOfGradVar(const std::shared_ptr& var); @@ -110,7 +110,7 @@ std::shared_ptr> PrepareData( cache_var->Var(), *tensor, tmp_var->MutableVar()); (*tmp_ins_ptr)[name_pair.first][i] = tmp_var; } else { - framework::Tensor out; + phi::DenseTensor out; TransformData( expected_kernel_key, kernel_type_for_var, *tensor, &out); if (NeedTransformDataType(kernel_type_for_var, @@ -656,7 +656,7 @@ void PreparePhiData(const phi::Kernel& phi_kernel, VLOG(3) << "Phi Transform Variable " << input_names[i] << " from " << tensor_in->place() << " to " << expected_place; - framework::Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; framework::TensorCopySync(*tensor_in, expected_place, &tmp_tensor); SetTensorToVariable(var->Var(), tmp_tensor, var->MutableVar()); diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index 24181eec59c4a..4492ca9257d28 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -34,7 +34,7 @@ namespace imperative { defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_CNCL) // div the nranks void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { - framework::Tensor *tensor = + phi::DenseTensor *tensor = is_sparse_ ? sparse_contents_->GetMutable()->mutable_value() : dense_contents_.GetMutable(); @@ -76,7 +76,7 @@ void Group::DivNRanks(const platform::DeviceContext &context, int64_t nranks) { template static void ConcatTensorsForAllReduce( const DeviceContext &context, - const std::vector &dense_tensors_, + const std::vector &dense_tensors_, framework::Variable *p_dense_contents) { operators::math::ConcatFunctor concat_functor_; concat_functor_(context, @@ -89,10 +89,10 @@ template static void SplitTensorsForAllReduce( const DeviceContext &context, framework::Variable *p_dense_contents, - std::vector *p_dense_tensors) { + std::vector *p_dense_tensors) { auto *in = p_dense_contents->GetMutable(); - std::vector outs; - std::vector shape_refer; + std::vector outs; + std::vector shape_refer; outs.reserve(p_dense_tensors->size()); shape_refer.reserve(p_dense_tensors->size()); @@ -114,7 +114,7 @@ static void SplitTensorsForAllReduce( template static void ConcatTensorsWithType( const DeviceContext &context, - const std::vector &dense_tensors_, + const std::vector &dense_tensors_, framework::Variable *p_dense_contents, framework::proto::VarType::Type type) { switch (type) { @@ -140,11 +140,10 @@ static void ConcatTensorsWithType( // context is used to select the stream for split template -static void SplitTensorsWithType( - const DeviceContext &context, - framework::Variable *p_dense_contents, - std::vector *p_dense_tensors, - framework::proto::VarType::Type type) { +static void SplitTensorsWithType(const DeviceContext &context, + framework::Variable *p_dense_contents, + std::vector *p_dense_tensors, + framework::proto::VarType::Type type) { switch (type) { case framework::proto::VarType::FP16: SplitTensorsForAllReduce( @@ -171,10 +170,10 @@ template <> void SplitTensorsForAllReduce( const platform::XPUDeviceContext &context, framework::Variable *p_dense_contents, - std::vector *p_dense_tensors) { + std::vector *p_dense_tensors) { auto *in = p_dense_contents->GetMutable(); - std::vector outs; - std::vector shape_refer; + std::vector outs; + std::vector shape_refer; outs.reserve(p_dense_tensors->size()); shape_refer.reserve(p_dense_tensors->size()); @@ -192,7 +191,7 @@ void SplitTensorsForAllReduce( template <> void ConcatTensorsWithType( const platform::XPUDeviceContext &context, - const std::vector &dense_tensors_, + const std::vector &dense_tensors_, framework::Variable *p_dense_contents, framework::proto::VarType::Type type) { switch (type) { @@ -213,7 +212,7 @@ template <> void SplitTensorsWithType( const platform::XPUDeviceContext &context, framework::Variable *p_dense_contents, - std::vector *p_dense_tensors, + std::vector *p_dense_tensors, framework::proto::VarType::Type type) { switch (type) { case framework::proto::VarType::FP32: @@ -234,7 +233,7 @@ void SplitTensorsWithType( template <> void ConcatTensorsWithType( const platform::MLUDeviceContext &context, - const std::vector &dense_tensors_, + const std::vector &dense_tensors_, framework::Variable *p_dense_contents, framework::proto::VarType::Type type) { switch (type) { @@ -259,7 +258,7 @@ template <> void SplitTensorsWithType( const platform::MLUDeviceContext &context, framework::Variable *p_dense_contents, - std::vector *p_dense_tensors, + std::vector *p_dense_tensors, framework::proto::VarType::Type type) { switch (type) { case framework::proto::VarType::FP16: @@ -479,7 +478,7 @@ void Reducer::InitializeDenseGroups( p_group->length_.push_back(size); // for concat operator - p_group->dense_tensors_.push_back(framework::Tensor()); + p_group->dense_tensors_.push_back(phi::DenseTensor()); // check the dtype and place, it must be same. const auto &dtype = var->DataType(); diff --git a/paddle/fluid/imperative/reducer.cu b/paddle/fluid/imperative/reducer.cu index a3f840f38bfad..59b7ecf915423 100644 --- a/paddle/fluid/imperative/reducer.cu +++ b/paddle/fluid/imperative/reducer.cu @@ -18,7 +18,7 @@ namespace paddle { namespace imperative { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) -void Group::DivNRanks(framework::Tensor *tensor, +void Group::DivNRanks(phi::DenseTensor *tensor, int64_t nranks, const platform::DeviceContext &context) { #ifdef PADDLE_WITH_HIP diff --git a/paddle/fluid/imperative/reducer.h b/paddle/fluid/imperative/reducer.h index e6ac357565145..c455f962788b8 100644 --- a/paddle/fluid/imperative/reducer.h +++ b/paddle/fluid/imperative/reducer.h @@ -61,10 +61,10 @@ struct DivNRanksFunctor { template struct DivNRanksForAllReduce { - framework::Tensor* in_; + phi::DenseTensor* in_; int64_t nranks_; const platform::DeviceContext& ctx_; - DivNRanksForAllReduce(framework::Tensor* in, + DivNRanksForAllReduce(phi::DenseTensor* in, int64_t nranks, const platform::DeviceContext& ctx) : in_(in), nranks_(nranks), ctx_(ctx) {} @@ -89,7 +89,7 @@ class Group { bool is_sparse_ = false; // for concat kernel - std::vector dense_tensors_; + std::vector dense_tensors_; std::vector length_; @@ -111,7 +111,7 @@ class Group { void SplitTensors(const platform::DeviceContext& context); // use it in CUDA - void DivNRanks(framework::Tensor* tensor, + void DivNRanks(phi::DenseTensor* tensor, int64_t nranks, const platform::DeviceContext& context); diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index 1f54ec8a132ba..9b417eefc4b32 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -278,7 +278,7 @@ static bool IsEqualVar(const framework::Variable& var1, return false; } - framework::Tensor t1, t2; + phi::DenseTensor t1, t2; if (var1.IsType()) { framework::TensorCopySync( @@ -328,7 +328,7 @@ static framework::Variable RandomTensor(const framework::DDim& dims, const platform::Place& place, int low = -10, int high = 10) { - framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; cpu_tensor.Resize(dims); auto* ptr = cpu_tensor.mutable_data(platform::CPUPlace()); std::uniform_int_distribution dist(low, high); diff --git a/paddle/fluid/imperative/tests/test_group.cc b/paddle/fluid/imperative/tests/test_group.cc index 9df5e6a735bd0..570f72605b586 100644 --- a/paddle/fluid/imperative/tests/test_group.cc +++ b/paddle/fluid/imperative/tests/test_group.cc @@ -84,7 +84,7 @@ void GroupConcatSplit(Place place, size_t size) { place, data, cpu_place, value.data(), sizeof(T) * value.size()); } - framework::Tensor tmp; + phi::DenseTensor tmp; tmp.ShareDataWith(*tensor).Resize({static_cast(len)}); group.dense_tensors_.push_back(std::move(tmp)); group.all_length_ += len; @@ -103,7 +103,7 @@ void GroupConcatSplit(Place place, size_t size) { group.DivNRanks(*dev_ctx, 1); - framework::Tensor tmp; + phi::DenseTensor tmp; framework::TensorCopySync(*tensor, cpu_place, &tmp); auto* data = tmp.data(); size_t offset = 0; @@ -124,7 +124,7 @@ void GroupConcatSplit(Place place, size_t size) { for (size_t i = 0; i < size; ++i) { auto len = i + 1; auto& tensor = group.dense_tensors_[i]; - framework::Tensor tmp; + phi::DenseTensor tmp; framework::TensorCopySync(tensor, cpu_place, &tmp); auto* data = tmp.data(); diff --git a/paddle/fluid/imperative/tests/test_prepare_op.cc b/paddle/fluid/imperative/tests/test_prepare_op.cc index e35568eb50c9a..f9501dedfe240 100644 --- a/paddle/fluid/imperative/tests/test_prepare_op.cc +++ b/paddle/fluid/imperative/tests/test_prepare_op.cc @@ -114,7 +114,7 @@ TEST(test_prepare_op, test_prepare_op) { {})); } -const framework::Tensor* GetTensorFromVar(const framework::Variable& var); +const phi::DenseTensor* GetTensorFromVar(const framework::Variable& var); TEST(test_prepare_op, test_get_tensor_from_var) { std::shared_ptr vout_error( diff --git a/paddle/fluid/imperative/variable_wrapper.h b/paddle/fluid/imperative/variable_wrapper.h index 98214f8d62bfe..eb4654e28e339 100644 --- a/paddle/fluid/imperative/variable_wrapper.h +++ b/paddle/fluid/imperative/variable_wrapper.h @@ -103,7 +103,7 @@ class VariableWrapper { bool IsEmpty() const { bool is_empty = true; if (var_.IsInitialized()) { - const framework::Tensor* tensor = nullptr; + const phi::DenseTensor* tensor = nullptr; if (var_.IsType()) { tensor = &(var_.Get()); } else if (var_.IsType()) { @@ -150,7 +150,7 @@ class VariableWrapper { } framework::proto::VarType::Type DataType() const { - const framework::Tensor* tensor = nullptr; + const phi::DenseTensor* tensor = nullptr; if (var_.IsInitialized()) { if (type_ == framework::proto::VarType::LOD_TENSOR) { tensor = &(var_.Get()); @@ -194,7 +194,7 @@ class VariableWrapper { } const platform::Place Place() const { - const framework::Tensor* tensor = nullptr; + const phi::DenseTensor* tensor = nullptr; auto place = platform::CPUPlace(); // Default place for var not initialized. if (var_.IsInitialized()) { diff --git a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc index b49ad4c145d55..20dd3ad560921 100644 --- a/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc +++ b/paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.cc @@ -160,11 +160,11 @@ void SaveMixedModel( for (const auto& param_name : parameters) { auto* var = scope->FindLocalVar(param_name); if (var->IsType() || - var->IsType()) { + var->IsType()) { auto* t = var->GetMutable(); if (t->dtype() != phi::DataType::FLOAT32) continue; - framework::Tensor mixed_tensor; + phi::DenseTensor mixed_tensor; mixed_tensor.Resize(t->dims()); auto* data = t->mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index 7f63eeaad2bf1..7f01b3401728d 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -61,7 +61,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToNpu(Argument *argument) { platform::errors::PreconditionNotMet("The var should not be nullptr")); if (var->IsType() || - var->IsType()) { + var->IsType()) { auto *t = var->GetMutable(); platform::CPUPlace cpu_place; @@ -126,7 +126,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { auto var_name = var_node->Var()->Name(); auto *var = scope->FindLocalVar(var_name); if (var->IsType() || - var->IsType()) { + var->IsType()) { auto *t = var->GetMutable(); params_total_bytes += t->numel() * experimental::SizeOf(t->dtype()); } @@ -135,7 +135,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { { // Alloc memory in pool to store all parameters. - framework::Tensor ts; + phi::DenseTensor ts; ts.mutable_data(place, params_total_bytes); } @@ -160,14 +160,14 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { platform::errors::PreconditionNotMet( "The var should not be nullptr")); if (var->IsType() || - var->IsType()) { + var->IsType()) { auto *t = var->GetMutable(); auto var_data_type = var_node->Var()->GetDataType(); VLOG(5) << "var_name is " << var_name << ", data type is " << var_data_type; if (var_data_type == paddle::framework::proto::VarType::FP16 && t->dtype() != paddle::experimental::DataType::FLOAT16) { - framework::Tensor half_tensor; + phi::DenseTensor half_tensor; half_tensor.set_type(paddle::experimental::DataType::FLOAT16); half_tensor.Resize(t->dims()); auto *half_data = @@ -179,7 +179,7 @@ void IrParamsSyncAmongDevicesPass::CopyParamsToGpu(Argument *argument) { t->clear(); paddle::framework::TensorCopySync(half_tensor, place, t); } else if (var_data_type == paddle::framework::proto::VarType::BF16) { - framework::Tensor bf16_tensor; + phi::DenseTensor bf16_tensor; bf16_tensor.set_type(paddle::experimental::DataType::BFLOAT16); bf16_tensor.Resize(t->dims()); auto *bf16_data = bf16_tensor.mutable_data( diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.h b/paddle/fluid/inference/api/details/reset_tensor_array.h index 6db0a204dbc3d..a7ce7f2205d73 100644 --- a/paddle/fluid/inference/api/details/reset_tensor_array.h +++ b/paddle/fluid/inference/api/details/reset_tensor_array.h @@ -39,7 +39,7 @@ namespace details { // training phase. struct TensorArrayBatchCleaner { TensorArrayBatchCleaner() { - constexpr auto kTensorId = framework::VarTypeTrait::kId; + constexpr auto kTensorId = framework::VarTypeTrait::kId; constexpr auto kLoDTensorId = framework::VarTypeTrait::kId; constexpr auto kSelectedRowsId = diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 022ba1483b955..6f496b86897e6 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -367,7 +367,7 @@ void Tensor::CopyToCpuImpl(T *data, auto *t_data = tensor->data(); auto t_place = tensor->place(); - paddle::framework::Tensor out; + phi::DenseTensor out; auto mem_allocation = std::make_shared( static_cast(data), @@ -843,7 +843,7 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, auto *t_data = tensor->data(); auto t_place = tensor->place(); - paddle::framework::Tensor out; + phi::DenseTensor out; auto mem_allocation = std::make_shared( static_cast(data), diff --git a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc index 4d524c01b783f..d6aefd320678d 100644 --- a/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fill_constant_op.cc @@ -36,7 +36,7 @@ class FillConstantOpConverter : public OpConverter { float value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); str_value = std::to_string(value); } - std::unique_ptr out_tensor(new framework::Tensor()); + std::unique_ptr out_tensor(new phi::DenseTensor()); out_tensor->Resize(phi::make_ddim(shape)); nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; void* trt_data = nullptr; diff --git a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc index 845e5c7d704ca..9d44c83d46243 100644 --- a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc @@ -59,7 +59,7 @@ class GeluOpConverter : public OpConverter { } std::string out_name = op_desc.Output("Out").front(); auto create_weights = [&](float data, std::string type) -> float* { - std::unique_ptr tmp_tensor(new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({1}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); tmp_data[0] = data; @@ -166,7 +166,7 @@ class GeluOpConverter : public OpConverter { } std::string out_name = op_desc.Output("Out").front(); auto create_weights = [&](float data, std::string type) -> float* { - std::unique_ptr tmp_tensor(new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({1}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); tmp_data[0] = data; diff --git a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc index 6752bf1d49768..03ec113311175 100644 --- a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc @@ -141,8 +141,7 @@ class MatMulOpConverter : public OpConverter { auto create_weights = [&](float data, const std::string& type) -> float* { - std::unique_ptr tmp_tensor( - new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({1}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 85a9b9d2fb346..da500014b8e81 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -495,7 +495,7 @@ class OpConverter { int data_size = std::accumulate( shape.d, shape.d + shape.nbDims, 1, std::multiplies()); - std::unique_ptr tmp_tensor(new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({data_size}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); for (int i = 0; i < data_size; i++) { @@ -530,7 +530,7 @@ class OpConverter { "supports float, half or int32_t.")); } - std::unique_ptr tmp_tensor(new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); int data_size = data.size(); tmp_tensor->Resize({data_size}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/inference/tensorrt/convert/scale_op.cc b/paddle/fluid/inference/tensorrt/convert/scale_op.cc index 9b0798d9f354f..a3b2e65ac4976 100644 --- a/paddle/fluid/inference/tensorrt/convert/scale_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/scale_op.cc @@ -50,7 +50,7 @@ class ScaleOpConverter : public OpConverter { float bias = PADDLE_GET_CONST(float, op_desc.GetAttr("bias")); float scale = PADDLE_GET_CONST(float, op_desc.GetAttr("scale")); auto create_weights = [&](float data, std::string type) -> float* { - std::unique_ptr tmp_tensor(new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); tmp_tensor->Resize({1}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); tmp_data[0] = data; diff --git a/paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc b/paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc index cb67957c79cbf..2302d96e23564 100644 --- a/paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/strided_slice_op.cc @@ -107,7 +107,7 @@ class StridedSliceOpConverter : public OpConverter { auto create_weights = [&](const std::vector& data, const std::string& type) -> int* { - std::unique_ptr tmp_tensor(new framework::Tensor()); + std::unique_ptr tmp_tensor(new phi::DenseTensor()); int data_size = data.size(); tmp_tensor->Resize({data_size}); auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 0cf1d6352c340..cf53e51d62fa7 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -423,7 +423,7 @@ void TensorRTEngine::SetRuntimeBatch(size_t batch_size) { // Note: Only for support plugin. TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( - const std::string &name, const framework::Tensor &weight_tensor) { + const std::string &name, const phi::DenseTensor &weight_tensor) { static int name_suffix_counter = 0; std::string name_suffix = std::to_string(name_suffix_counter); std::string splitter = "__"; @@ -435,7 +435,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( "The weight named %s is set into the weight map " "twice in TRT OP converter.", name_with_suffix)); - weight_map[name_with_suffix].reset(new framework::Tensor()); + weight_map[name_with_suffix].reset(new phi::DenseTensor()); weight_map[name_with_suffix]->Resize(weight_tensor.dims()); TensorRTEngine::Weight weight; @@ -445,7 +445,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( // if trt not support dtype, we need to cast to fp16. if (weight_tensor.dtype() == phi::DataType::BFLOAT16) { - framework::Tensor bf16_tensor; + phi::DenseTensor bf16_tensor; bf16_tensor.clear(); paddle::framework::TensorCopySync( weight_tensor, platform::CPUPlace(), &bf16_tensor); @@ -459,7 +459,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( fp16_data[i] = static_cast(bf16_data[i]); } } else if (weight_tensor.dtype() == phi::DataType::FLOAT32) { - framework::Tensor fp32_tensor; + phi::DenseTensor fp32_tensor; fp32_tensor.clear(); paddle::framework::TensorCopySync( weight_tensor, platform::CPUPlace(), &fp32_tensor); @@ -483,7 +483,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp16TrtWeight( // Note: Only for support plugin. TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( - const std::string &name, const framework::Tensor &weight_tensor) { + const std::string &name, const phi::DenseTensor &weight_tensor) { static int name_suffix_counter = 0; std::string name_suffix = std::to_string(name_suffix_counter); std::string splitter = "__"; @@ -495,7 +495,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( "The weight named %s is set into the weight map " "twice in TRT OP converter.", name_with_suffix)); - weight_map[name_with_suffix].reset(new framework::Tensor()); + weight_map[name_with_suffix].reset(new phi::DenseTensor()); weight_map[name_with_suffix]->Resize(weight_tensor.dims()); TensorRTEngine::Weight weight; @@ -505,7 +505,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( // if trt not support dtype, we need to cast to fp32. if (weight_tensor.dtype() == phi::DataType::BFLOAT16) { - framework::Tensor bf16_tensor; + phi::DenseTensor bf16_tensor; bf16_tensor.clear(); paddle::framework::TensorCopySync( weight_tensor, platform::CPUPlace(), &bf16_tensor); @@ -519,7 +519,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( fp32_data[i] = static_cast(bf16_data[i]); } } else if (weight_tensor.dtype() == phi::DataType::FLOAT16) { - framework::Tensor fp16_tensor; + phi::DenseTensor fp16_tensor; fp16_tensor.clear(); paddle::framework::TensorCopySync( weight_tensor, platform::CPUPlace(), &fp16_tensor); @@ -542,7 +542,7 @@ TensorRTEngine::Weight TensorRTEngine::GetFp32TrtWeight( } TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( - const std::string &name, const framework::Tensor &weight_tensor) { + const std::string &name, const phi::DenseTensor &weight_tensor) { static int name_suffix_counter = 0; std::string name_suffix = std::to_string(name_suffix_counter); std::string splitter = "__"; @@ -555,7 +555,7 @@ TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( "twice in TRT OP converter.", name_with_suffix)); - weight_map[name_with_suffix].reset(new framework::Tensor()); + weight_map[name_with_suffix].reset(new phi::DenseTensor()); weight_map[name_with_suffix]->Resize(weight_tensor.dims()); TensorRTEngine::Weight weight; @@ -563,7 +563,7 @@ TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( // if trt not support dtype, we need to cast to fp32. if (weight_tensor.dtype() == phi::DataType::BFLOAT16) { - framework::Tensor bf16_tensor; + phi::DenseTensor bf16_tensor; bf16_tensor.clear(); paddle::framework::TensorCopySync( weight_tensor, platform::CPUPlace(), &bf16_tensor); @@ -578,7 +578,7 @@ TensorRTEngine::Weight TensorRTEngine::GetTrtWeight( weight.SetDataType(phi::DataType::FLOAT32); weight.SetValues(fp32_data); } else if (weight_tensor.dtype() == phi::DataType::INT64) { - framework::Tensor int64_tensor; + phi::DenseTensor int64_tensor; int64_tensor.clear(); paddle::framework::TensorCopySync( weight_tensor, platform::CPUPlace(), &int64_tensor); diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 209f297a0668f..2f742dbdb50a6 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -451,15 +451,15 @@ class TensorRTEngine { // Get fp16 trt weight. If src weight is not fp16, we will cast. Weight GetFp16TrtWeight(const std::string& name, - const framework::Tensor& weight_tensor); + const phi::DenseTensor& weight_tensor); // Get fp32 trt weight. If src weight is not fp32, we will cast. Weight GetFp32TrtWeight(const std::string& name, - const framework::Tensor& weight_tensor); + const phi::DenseTensor& weight_tensor); // if the src weight type is fp16, then return fp16 trt weight, etc. Weight GetTrtWeight(const std::string& name, - const framework::Tensor& weight_tensor); + const phi::DenseTensor& weight_tensor); float GetTensorDynamicRange(nvinfer1::ITensor* tensor) { return quant_dynamic_range_[tensor]; @@ -474,13 +474,13 @@ class TensorRTEngine { // so we need to copy the weights from GPU to CPU in our op converter. // We use a map to store these weights for the weight memory is not released // in advance, which affecting the construction of TRT Op. - std::unordered_map> + std::unordered_map> weight_map; // When setting weight_map, a self-increasing suffix is needed for the names // so as to avoid repeatedly setting weights with the same name. void SetWeights(std::string w_name, - std::unique_ptr w_tensor) { + std::unique_ptr w_tensor) { static int suffix_counter = 0; std::string suffix = std::to_string(suffix_counter); std::string splitter = "__"; diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h index ec7d9545ce387..d0815798a6e47 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h @@ -95,7 +95,7 @@ class EmbEltwiseLayernormPluginDynamicImpl int hidden_size_; float eps_; - framework::Tensor in_ptr_tensor_, emb_ptr_tensor_; + phi::DenseTensor in_ptr_tensor_, emb_ptr_tensor_; int device_id_{0}; bool is_initialized_{false}; }; @@ -303,10 +303,11 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { SerializeValue(&buffer, eps_); } - nvinfer1::DimsExprs getOutputDimensions(int output_index, - const nvinfer1::DimsExprs* inputs, - int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) + nvinfer1::DimsExprs getOutputDimensions( + int output_index, + const nvinfer1::DimsExprs* inputs, + int nb_inputs, + nvinfer1::IExprBuilder& expr_builder) // NOLINT TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, diff --git a/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu index f52ef0c52ff0e..e49bf16bf6878 100644 --- a/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/fused_token_prune_op_plugin.cu @@ -469,7 +469,7 @@ inline void enqueueImpl(const nvinfer1::PluginTensorDesc* input_desc, sizeof(T) * 8, stream)); int64_t temp_size = temp_storage_bytes; - framework::Tensor temp_storage; + phi::DenseTensor temp_storage; auto* temp_storage_data = temp_storage.mutable_data( {temp_size}, platform::CUDAPlace(device_id)); diff --git a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu index 294677e6ac5de..ca4126d5aefcf 100644 --- a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu @@ -87,7 +87,7 @@ int GroupNormPlugin::enqueue(int batch_size, float *variance_d = variance_t.mutable_data(platform::CUDAPlace(device_id)); - framework::Tensor temp_variance_t; + phi::DenseTensor temp_variance_t; temp_variance_t.Resize(phi::make_ddim(variance_shape_)); float *temp_variance_d = temp_variance_t.mutable_data(platform::CUDAPlace(device_id)); @@ -220,7 +220,7 @@ int GroupNormPluginDynamic::enqueue( float *variance_d = variance_t.mutable_data(platform::CUDAPlace(device_id)); - framework::Tensor temp_variance_t; + phi::DenseTensor temp_variance_t; temp_variance_t.Resize(phi::make_ddim(batched_variance_shape)); float *temp_variance_d = temp_variance_t.mutable_data(platform::CUDAPlace(device_id)); diff --git a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h index fdcb93e29f042..757ff33443455 100644 --- a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h @@ -105,10 +105,10 @@ class GroupNormPlugin : public PluginTensorRT { private: std::vector scale_; std::vector bias_; - framework::Tensor scale_t; - framework::Tensor bias_t; - framework::Tensor mean_t; - framework::Tensor variance_t; + phi::DenseTensor scale_t; + phi::DenseTensor bias_t; + phi::DenseTensor mean_t; + phi::DenseTensor variance_t; int groups_; float eps_; std::vector mean_shape_; @@ -187,10 +187,11 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { SerializeValue(&buffer, mean_shape_); SerializeValue(&buffer, variance_shape_); } - nvinfer1::DimsExprs getOutputDimensions(int output_index, - const nvinfer1::DimsExprs* inputs, - int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) + nvinfer1::DimsExprs getOutputDimensions( + int output_index, + const nvinfer1::DimsExprs* inputs, + int nb_inputs, + nvinfer1::IExprBuilder& expr_builder) // NOLINT TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, @@ -226,10 +227,10 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { private: std::vector scale_; std::vector bias_; - framework::Tensor scale_t; - framework::Tensor bias_t; - framework::Tensor mean_t; - framework::Tensor variance_t; + phi::DenseTensor scale_t; + phi::DenseTensor bias_t; + phi::DenseTensor mean_t; + phi::DenseTensor variance_t; int groups_; float eps_; std::vector mean_shape_; diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h index 30ee5dbea8fc8..90a01d076f367 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h @@ -33,8 +33,8 @@ class InstanceNormPlugin : public PluginTensorRT { std::vector scale_; std::vector bias_; - framework::Tensor scale_t; - framework::Tensor bias_t; + phi::DenseTensor scale_t; + phi::DenseTensor bias_t; cudnnHandle_t handle_; cudnnTensorDescriptor_t x_desc_, y_desc_, b_desc_; diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h index a8ccabb3cff59..84d1898d50f84 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h @@ -31,10 +31,10 @@ namespace plugin { class LayerNormPlugin : public PluginTensorRT { std::vector bias_; std::vector scale_; - framework::Tensor scale_t; - framework::Tensor bias_t; - framework::Tensor mean_t; - framework::Tensor variance_t; + phi::DenseTensor scale_t; + phi::DenseTensor bias_t; + phi::DenseTensor mean_t; + phi::DenseTensor variance_t; int begin_norm_axis_; float eps_; std::vector mean_shape_; @@ -201,10 +201,11 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT { SerializeValue(&buffer, variance_shape_); } - nvinfer1::DimsExprs getOutputDimensions(int output_index, - const nvinfer1::DimsExprs* inputs, - int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) + nvinfer1::DimsExprs getOutputDimensions( + int output_index, + const nvinfer1::DimsExprs* inputs, + int nb_inputs, + nvinfer1::IExprBuilder& expr_builder) // NOLINT TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, @@ -240,10 +241,10 @@ class LayerNormPluginDynamic : public DynamicPluginTensorRT { private: std::vector bias_; std::vector scale_; - framework::Tensor scale_t; - framework::Tensor bias_t; - framework::Tensor mean_t; - framework::Tensor variance_t; + phi::DenseTensor scale_t; + phi::DenseTensor bias_t; + phi::DenseTensor mean_t; + phi::DenseTensor variance_t; int begin_norm_axis_; float eps_; std::vector mean_shape_; diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index e4a9504d8c869..336fcb5531799 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -301,7 +301,7 @@ void QkvToContextPluginDynamic::configurePlugin( } else if (in[0].desc.type == nvinfer1::DataType::kFLOAT) { fake_qk_bias_ = reinterpret_cast( tensor_.mutable_data(platform::CUDAPlace(device_id))); - long size = sizeof(int32_t) * batch * seq_len * seq_len * head_number_; + int64_t size = sizeof(int32_t) * batch * seq_len * seq_len * head_number_; #ifdef PADDLE_WITH_HIP PADDLE_ENFORCE_GPU_SUCCESS( hipMemsetAsync(fake_qk_bias_, 0, size, dev_ctx.stream())); @@ -408,7 +408,7 @@ int QkvToContextPluginDynamic::enqueue( // input[0], (B, S, 3 * N * H, 1, 1) int batch = input_dims.d[0]; int seq_len = input_dims.d[1]; - framework::Tensor multihead_temp_tensor; + phi::DenseTensor multihead_temp_tensor; int scratch_size = batch * head_number_ * seq_len * seq_len * 1; int device_id; @@ -425,7 +425,7 @@ int QkvToContextPluginDynamic::enqueue( const float *input0_data = static_cast(inputs[0]); // fit to [batch, head_num, length, length] + [batch, 1, 1, length] - framework::Tensor temp_qk_bias_tensor; + phi::DenseTensor temp_qk_bias_tensor; float *qk_bias = const_cast(static_cast(inputs[1])); if (ProductDim(input_desc[1].dims) == (batch * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); @@ -494,7 +494,7 @@ int QkvToContextPluginDynamic::enqueue( const half *input0_data = static_cast(inputs[0]); // fit to [batch, head_num, length, length] + [batch, 1, 1, length] - framework::Tensor temp_qk_bias_tensor; + phi::DenseTensor temp_qk_bias_tensor; half *qk_bias = const_cast(static_cast(inputs[1])); if (ProductDim(input_desc[1].dims) == (batch * seq_len)) { temp_qk_bias_tensor.Resize({batch, head_number_, seq_len, seq_len}); diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h index 17c9e904d4228..dd3dc71e956a4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h @@ -83,10 +83,11 @@ class QkvToContextPluginDynamic : public DynamicPluginTensorRT { SerializeValue(&buffer, with_fp16_); } - nvinfer1::DimsExprs getOutputDimensions(int output_index, - const nvinfer1::DimsExprs* inputs, - int nb_inputs, - nvinfer1::IExprBuilder& expr_builder) + nvinfer1::DimsExprs getOutputDimensions( + int output_index, + const nvinfer1::DimsExprs* inputs, + int nb_inputs, + nvinfer1::IExprBuilder& expr_builder) // NOLINT TRT_NOEXCEPT override; bool supportsFormatCombination(int pos, @@ -124,7 +125,7 @@ class QkvToContextPluginDynamic : public DynamicPluginTensorRT { int head_number_; int head_size_; float scale_; - framework::Tensor tensor_; + phi::DenseTensor tensor_; half* mask_half_; float* fake_qk_bias_; }; diff --git a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc index 43e219232d111..6a253d2815941 100644 --- a/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_dynamic_engine.cc @@ -91,8 +91,8 @@ class TensorRTDynamicEngineTest : public ::testing::Test { } protected: - framework::Tensor input_; - framework::Tensor output_; + phi::DenseTensor input_; + phi::DenseTensor output_; TensorRTEngine *engine_; phi::GPUContext *ctx_; }; @@ -276,8 +276,8 @@ class TensorRTDynamicTestFusedTokenPrune : public ::testing::Test { } protected: - std::vector inputs_; - std::vector outputs_; + std::vector inputs_; + std::vector outputs_; TensorRTEngine *engine_; phi::GPUContext *ctx_; }; diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index dc8065ab2a628..027c593d73c6f 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -66,8 +66,8 @@ class TensorRTEngineTest : public ::testing::Test { } protected: - framework::Tensor input_; - framework::Tensor output_; + phi::DenseTensor input_; + phi::DenseTensor output_; TensorRTEngine *engine_; phi::GPUContext *ctx_; }; diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc index 948a3e105f3d4..dbcdc8b8b7c1c 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc @@ -33,7 +33,7 @@ TRTInt8Calibrator::TRTInt8Calibrator( int i = 0; VLOG(4) << "Init a new calibrator: " << engine_name_; for (const auto it : buffers) { - framework::Tensor temp_tensor; + phi::DenseTensor temp_tensor; std::string input_name = it.first; int data_size = it.second; int num_ele = data_size / sizeof(int16_t); diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h index 35b018c1a0262..d0d4c46b4dc6b 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h @@ -73,7 +73,7 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 { std::condition_variable cond_; std::unordered_map> data_buffers_; - std::vector data_tensors_; + std::vector data_tensors_; std::string engine_name_; std::string calibration_table_; diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc index 9a2a75a642ab7..d8fd433c0417c 100644 --- a/paddle/fluid/operators/abs_op.cc +++ b/paddle/fluid/operators/abs_op.cc @@ -153,7 +153,7 @@ class AbsDoubleGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( framework::TransToProtoVarType(tensor.dtype()), diff --git a/paddle/fluid/operators/abs_op_mlu.cc b/paddle/fluid/operators/abs_op_mlu.cc index 284ef4902be4b..9afa4c28e0544 100644 --- a/paddle/fluid/operators/abs_op_mlu.cc +++ b/paddle/fluid/operators/abs_op_mlu.cc @@ -18,14 +18,14 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class AbsMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); @@ -44,9 +44,9 @@ template class AbsGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/abs_op_npu.cc b/paddle/fluid/operators/abs_op_npu.cc index 490350ab980cd..a1ca88ae5b572 100644 --- a/paddle/fluid/operators/abs_op_npu.cc +++ b/paddle/fluid/operators/abs_op_npu.cc @@ -18,14 +18,14 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class AbsNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -47,9 +47,9 @@ template class AbsGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/activation_cudnn.cu.cc b/paddle/fluid/operators/activation_cudnn.cu.cc index 2ad92e36272b3..3afe6b4608fc4 100644 --- a/paddle/fluid/operators/activation_cudnn.cu.cc +++ b/paddle/fluid/operators/activation_cudnn.cu.cc @@ -18,7 +18,7 @@ namespace paddle { namespace operators { -using framework::Tensor; + using platform::ActivationDescriptor; using platform::TensorDescriptor; @@ -27,7 +27,7 @@ class CudnnActivationKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - framework::Tensor *X, *Out; + phi::DenseTensor *X, *Out; ExtractActivationTensor(context, X, Out); ActivationDescriptor act_desc; TensorDescriptor x_desc, out_desc; diff --git a/paddle/fluid/operators/activation_cudnn_op.cu.cc b/paddle/fluid/operators/activation_cudnn_op.cu.cc index 49f78715c2cf5..c4e2685dd5958 100644 --- a/paddle/fluid/operators/activation_cudnn_op.cu.cc +++ b/paddle/fluid/operators/activation_cudnn_op.cu.cc @@ -18,7 +18,7 @@ namespace paddle { namespace operators { -using framework::Tensor; + using phi::GPUContext; using platform::ActivationDescriptor; using platform::TensorDescriptor; @@ -49,7 +49,7 @@ struct CudnnActivationFunctor { const cudnnActivationMode_t& m) : ctx_(ctx), coef_(c), mode_(m) {} #endif - void operator()(const Tensor& x, Tensor* out) { + void operator()(const phi::DenseTensor& x, phi::DenseTensor* out) { ActivationDescriptor act_desc; act_desc.set(mode_, coef_); TensorDescriptor x_desc, out_desc; @@ -100,10 +100,10 @@ struct CudnnActivationGradFunctor { const cudnnActivationMode_t& m) : ctx_(ctx), coef_(c), mode_(m) {} #endif - void operator()(const Tensor& x, - const Tensor& out, - const Tensor dout, - Tensor* dx) { + void operator()(const phi::DenseTensor& x, + const phi::DenseTensor& out, + const phi::DenseTensor dout, + phi::DenseTensor* dx) { ActivationDescriptor act_desc; act_desc.set(mode_, coef_); TensorDescriptor x_desc, out_desc, dout_desc, dx_desc; @@ -217,8 +217,8 @@ class CudnnActivationKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - const framework::Tensor* X = nullptr; - framework::Tensor* Out = nullptr; + const phi::DenseTensor* X = nullptr; + phi::DenseTensor* Out = nullptr; ExtractActivationTensor(context, &X, &Out); Out->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); @@ -236,9 +236,9 @@ class CudnnActivationGradKernel static_assert(Functor::FwdDeps() == ActBwdOpFwdDeps::kDepOut, "Forward deps must be Out."); - const framework::Tensor *X, *Out, *dOut; + const phi::DenseTensor *X, *Out, *dOut; X = Out = dOut = nullptr; - framework::Tensor* dX = nullptr; + phi::DenseTensor* dX = nullptr; ExtractActivationGradTensor( context, &X, &Out, &dOut, &dX); dX->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index d0ac97b2d002b..f921ad844b310 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -30,8 +30,6 @@ DECLARE_bool(use_mkldnn); namespace paddle { namespace operators { -using paddle::framework::Tensor; - template static constexpr bool CanInplaceAct() { return GradFunctor::FwdDeps() == ActBwdOpFwdDeps::kDepOut || @@ -124,7 +122,7 @@ class ActivationOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifdef PADDLE_WITH_MKLDNN // When activation is first oneDNN op (there was some non oneDNN op @@ -1345,7 +1343,7 @@ class PowOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "FactorTensor") { return expected_kernel_type; @@ -1373,7 +1371,7 @@ class PowOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "FactorTensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index eeec8c300daac..9421240c14c67 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -52,8 +52,8 @@ static std::unordered_set CanBeUsedBySelectedRows = { "abs", "abs_grad", "square", "square_grad", "sqrt", "sqrt_grad"}; inline void ExtractActivationTensor(const framework::ExecutionContext& context, - const framework::Tensor** X, - framework::Tensor** Out) { + const phi::DenseTensor** X, + phi::DenseTensor** Out) { auto x_var = context.InputVar("X"); auto out_var = context.OutputVar("Out"); PADDLE_ENFORCE_NOT_NULL(x_var, @@ -70,8 +70,8 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, *Out = paddle::framework::GetMutableLoDTensorOrSelectedRowsValueFromVar( out_var); } else { - *X = context.Input("X"); - *Out = context.Output("Out"); + *X = context.Input("X"); + *Out = context.Output("Out"); } PADDLE_ENFORCE_NOT_NULL( @@ -84,10 +84,10 @@ inline void ExtractActivationTensor(const framework::ExecutionContext& context, template inline void ExtractActivationGradTensor( const framework::ExecutionContext& context, - const framework::Tensor** X, - const framework::Tensor** Out, - const framework::Tensor** dOut, - framework::Tensor** dX) { + const phi::DenseTensor** X, + const phi::DenseTensor** Out, + const phi::DenseTensor** dOut, + phi::DenseTensor** dX) { auto out_grad_var = context.InputVar(framework::GradVarName("Out")); auto x_grad_var = context.OutputVar(framework::GradVarName("X")); const framework::Variable* out_var = nullptr; @@ -129,9 +129,9 @@ inline void ExtractActivationGradTensor( } } else { - *Out = context.Input("Out"); - *dOut = context.Input(framework::GradVarName("Out")); - *dX = context.Output(framework::GradVarName("X")); + *Out = context.Input("Out"); + *dOut = context.Input(framework::GradVarName("Out")); + *dX = context.Output(framework::GradVarName("X")); if (out_var) { *Out = &(out_var->Get()); @@ -156,7 +156,7 @@ inline void ExtractActivationGradTensor( if (CanBeUsedBySelectedRows.count(context.Type())) { *X = paddle::framework::GetLoDTensorOrSelectedRowsValueFromVar(*x_var); } else { - *X = context.Input("X"); + *X = context.Input("X"); } } else { VLOG(10) << " Inplace activation of Op : " << context.Type(); @@ -171,8 +171,8 @@ class ActivationKernel using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - const framework::Tensor* X = nullptr; - framework::Tensor* Out = nullptr; + const phi::DenseTensor* X = nullptr; + phi::DenseTensor* Out = nullptr; ExtractActivationTensor(context, &X, &Out); Out->mutable_data(context.GetPlace()); @@ -205,8 +205,8 @@ class ActivationGradKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& context) const override { - const framework::Tensor *X, *Out, *dOut; - framework::Tensor* dX = nullptr; + const phi::DenseTensor *X, *Out, *dOut; + phi::DenseTensor* dX = nullptr; X = Out = dOut = nullptr; ExtractActivationGradTensor( context, &X, &Out, &dOut, &dX); @@ -391,11 +391,10 @@ template class ELUGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* X = context.Input("X"); - auto* Out = context.Input("Out"); - auto* dOut = - context.Input(framework::GradVarName("Out")); - auto* dX = context.Output(framework::GradVarName("X")); + auto* X = context.Input("X"); + auto* Out = context.Input("Out"); + auto* dOut = context.Input(framework::GradVarName("Out")); + auto* dX = context.Output(framework::GradVarName("X")); const float alpha = context.Attr("alpha"); dX->mutable_data(context.GetPlace()); @@ -426,12 +425,12 @@ template struct AbsGradGradFunctor : public BaseActivationFunctor { template void operator()(const Device& dev, - const framework::Tensor* X, - const framework::Tensor* Out, - const framework::Tensor* ddX, - framework::Tensor* ddOut, - framework::Tensor* dOut, - framework::Tensor* dX) const { + const phi::DenseTensor* X, + const phi::DenseTensor* Out, + const phi::DenseTensor* ddX, + phi::DenseTensor* ddOut, + phi::DenseTensor* dOut, + phi::DenseTensor* dX) const { auto* d = dev.eigen_device(); auto ddx = framework::EigenVector::Flatten( GET_DATA_SAFELY(ddX, "Input", "DDX", "AbsGradGrad")); @@ -451,11 +450,11 @@ struct AbsGradGradFunctor : public BaseActivationFunctor { // others. Impliment extraction kernel separately here. inline void ExtractDoubleGradTensorWithInputDOut( const framework::ExecutionContext& ctx, - const framework::Tensor** X, - const framework::Tensor** ddX, - framework::Tensor** dX, - const framework::Tensor** dOut, - framework::Tensor** ddOut) { + const phi::DenseTensor** X, + const phi::DenseTensor** ddX, + phi::DenseTensor** dX, + const phi::DenseTensor** dOut, + phi::DenseTensor** ddOut) { // extract ddX(output), ddOut(input) auto ddx_var = ctx.InputVar("DDX"); auto ddo_var = ctx.OutputVar("DDOut"); @@ -464,9 +463,9 @@ inline void ExtractDoubleGradTensorWithInputDOut( platform::errors::NotFound( "Cannot get input Variable Out, variable name = %s", ctx.InputName("DDX"))); - *ddX = ctx.Input("DDX"); + *ddX = ctx.Input("DDX"); if (ddo_var) { - *ddOut = ctx.Output("DDOut"); + *ddOut = ctx.Output("DDOut"); } PADDLE_ENFORCE_NOT_NULL( ddX, @@ -482,15 +481,15 @@ inline void ExtractDoubleGradTensorWithInputDOut( "Cannot get input Variable Out, variable name = %s", ctx.InputName("X"))); auto dx_var = ctx.OutputVar("DX"); - *X = ctx.Input("X"); + *X = ctx.Input("X"); if (dx_var) { - *dX = ctx.Output("DX"); + *dX = ctx.Output("DX"); } // extract dOut(input) auto dout_var = ctx.InputVar("DOut"); if (dout_var) { - *dOut = ctx.Input("DOut"); + *dOut = ctx.Input("DOut"); } } diff --git a/paddle/fluid/operators/activation_op.kps b/paddle/fluid/operators/activation_op.kps index 76a05aa37a646..8cc5e925f7490 100644 --- a/paddle/fluid/operators/activation_op.kps +++ b/paddle/fluid/operators/activation_op.kps @@ -72,13 +72,13 @@ class ActivationCudaKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& ctx) const override { - const framework::Tensor* x = nullptr; - framework::Tensor* out = nullptr; + const phi::DenseTensor* x = nullptr; + phi::DenseTensor* out = nullptr; ExtractActivationTensor(ctx, &x, &out); out->mutable_data(ctx.GetPlace()); auto& dev_ctx = ctx.template device_context(); - std::vector ins = {x}; - std::vector outs = {out}; + std::vector ins = {x}; + std::vector outs = {out}; auto functor = Functor(); auto attrs = functor.GetAttrs(); for (auto& attr : attrs) { @@ -95,8 +95,8 @@ class ActivationGradCudaKernel public: using T = typename Functor::ELEMENT_TYPE; void Compute(const framework::ExecutionContext& ctx) const override { - const framework::Tensor *x, *out, *d_out; - framework::Tensor* d_x = nullptr; + const phi::DenseTensor *x, *out, *d_out; + phi::DenseTensor* d_x = nullptr; x = out = d_out = nullptr; ExtractActivationGradTensor( ctx, &x, &out, &d_out, &d_x); @@ -108,8 +108,8 @@ class ActivationGradCudaKernel *attr.second = ctx.Attr(attr.first); } - std::vector ins = {d_out}; - std::vector outs = {d_x}; + std::vector ins = {d_out}; + std::vector outs = {d_x}; if (static_cast(Functor::FwdDeps()) == static_cast(ActBwdOpFwdDeps::kDepOut)) { diff --git a/paddle/fluid/operators/activation_op_mlu.cc b/paddle/fluid/operators/activation_op_mlu.cc index 6cfe4738d777b..736b398996b45 100644 --- a/paddle/fluid/operators/activation_op_mlu.cc +++ b/paddle/fluid/operators/activation_op_mlu.cc @@ -21,14 +21,14 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ActivationMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; output->mutable_data(ctx.GetPlace()); @@ -51,9 +51,9 @@ template class ActivationGradMLUKernelV1 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; dx->mutable_data(ctx.GetPlace()); @@ -82,9 +82,9 @@ template class ActivationGradMLUKernelV2 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; dx->mutable_data(ctx.GetPlace()); @@ -113,9 +113,9 @@ template class ActivationGradMLUKernelV3 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); float alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 1.0f; dx->mutable_data(ctx.GetPlace()); @@ -144,8 +144,8 @@ template class SqrtMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); @@ -167,9 +167,9 @@ template class SqrtGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); @@ -190,8 +190,8 @@ template class LogMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc input_desc(*input); @@ -212,8 +212,8 @@ template class ExpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc input_desc(*input); @@ -233,9 +233,9 @@ template class ExpGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc dout_desc(*dout); MLUCnnlTensorDesc dx_desc(*dx); @@ -260,8 +260,8 @@ template class HardSwishMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); float threshold = ctx.Attr("threshold"); float scale = ctx.Attr("scale"); @@ -312,9 +312,9 @@ class HardSwishGradMLUKernel : public framework::OpKernel { offset, 3.0f, platform::errors::External("Not support offset [%f] in MLU", offset)); - auto* out = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); @@ -342,8 +342,8 @@ template class HardSigmoidMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); float slope = ctx.Attr("slope"); float offset = ctx.Attr("offset"); output->mutable_data(ctx.GetPlace()); @@ -369,9 +369,9 @@ template class HardSigmoidGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* x = ctx.Input("X"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* dx = ctx.Output(framework::GradVarName("X")); float slope = ctx.Attr("slope"); float offset = ctx.Attr("offset"); dx->mutable_data(ctx.GetPlace()); @@ -403,8 +403,8 @@ template class FloorMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc input_desc(*input); @@ -422,8 +422,8 @@ template class ReciprocalMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); MLUCnnlTensorDesc x_desc(*x); @@ -437,9 +437,9 @@ template class ReciprocalGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); Tensor square_out; diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc index 141e5832cceb8..52a472a595a92 100644 --- a/paddle/fluid/operators/activation_op_npu.cc +++ b/paddle/fluid/operators/activation_op_npu.cc @@ -24,7 +24,7 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class PowNPUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/addmm_op.cc b/paddle/fluid/operators/addmm_op.cc index 833285615f169..a2dbd8e2aa269 100644 --- a/paddle/fluid/operators/addmm_op.cc +++ b/paddle/fluid/operators/addmm_op.cc @@ -31,7 +31,6 @@ namespace operators { constexpr int kMULMKLDNNINT8 = 1; using framework::OpKernelType; -using framework::Tensor; class AddMMOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index 8c6360bfd89cf..23bccf9d8319c 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -188,11 +188,11 @@ template class AffineChannelKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Out"); + auto* y = ctx.Output("Out"); y->mutable_data(ctx.GetPlace()); const framework::DataLayout layout = @@ -233,14 +233,14 @@ template class AffineChannelGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* dy = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* dy = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto* dscale = - ctx.Output(framework::GradVarName("Scale")); - auto* dbias = ctx.Output(framework::GradVarName("Bias")); + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); const framework::DataLayout layout = framework::StringToDataLayout(ctx.Attr("data_layout")); diff --git a/paddle/fluid/operators/affine_channel_op.cu b/paddle/fluid/operators/affine_channel_op.cu index 8fcdb32388418..cbbbd96ad845e 100644 --- a/paddle/fluid/operators/affine_channel_op.cu +++ b/paddle/fluid/operators/affine_channel_op.cu @@ -52,11 +52,11 @@ template class AffineChannelCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Out"); + auto* y = ctx.Output("Out"); y->mutable_data(ctx.GetPlace()); const framework::DataLayout layout = @@ -137,15 +137,15 @@ template class AffineChannelGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* dy = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* dy = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto* dscale = - ctx.Output(framework::GradVarName("Scale")); - auto* dbias = ctx.Output(framework::GradVarName("Bias")); + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); const framework::DataLayout layout = framework::StringToDataLayout(ctx.Attr("data_layout")); diff --git a/paddle/fluid/operators/affine_channel_op_xpu.cc b/paddle/fluid/operators/affine_channel_op_xpu.cc index f31ad6378912c..b3b64cb0b0684 100644 --- a/paddle/fluid/operators/affine_channel_op_xpu.cc +++ b/paddle/fluid/operators/affine_channel_op_xpu.cc @@ -29,11 +29,11 @@ template class AffineChannelXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Out"); + auto* y = ctx.Output("Out"); y->mutable_data(ctx.GetPlace()); const framework::DataLayout layout = @@ -90,14 +90,14 @@ template class AffineChannelGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* dy = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* dy = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto* dscale = - ctx.Output(framework::GradVarName("Scale")); - auto* dbias = ctx.Output(framework::GradVarName("Bias")); + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); const framework::DataLayout layout = framework::StringToDataLayout(ctx.Attr("data_layout")); diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index 1c0b8800f7bf5..2411860aa9e74 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AffineGridOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc index 78bacc3016178..508c51de723c0 100644 --- a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc @@ -21,13 +21,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class AllocFloatStatusKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* float_status = ctx.Output("FloatStatus"); + auto* float_status = ctx.Output("FloatStatus"); float_status->mutable_data(ctx.GetPlace()); const auto& runner = diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc index 41ba11ac04609..5f5415ffd37d0 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_mlu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel { @@ -28,10 +28,10 @@ class CheckFiniteAndUnscaleMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto& dev_ctx = ctx.template device_context(); - const auto xs = ctx.MultiInput("X"); - const auto* scale = ctx.Input("Scale"); - auto outs = ctx.MultiOutput("Out"); - auto* found_inf = ctx.Output("FoundInfinite"); + const auto xs = ctx.MultiInput("X"); + const auto* scale = ctx.Input("Scale"); + auto outs = ctx.MultiOutput("Out"); + auto* found_inf = ctx.Output("FoundInfinite"); found_inf->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc index 98768afa9362a..3b6e2ba7184c0 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; // NOTE(zhiqiu): The CheckFiniteAndUnscaleNPUKernel is different from CUDA. // On NPU, we do not really check the data of input tensors, @@ -34,11 +34,11 @@ template class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - const auto xs = ctx.MultiInput("X"); - const auto* scale = ctx.Input("Scale"); - const auto* float_status = ctx.Input("FloatStatus"); - auto outs = ctx.MultiOutput("Out"); - auto* found_inf = ctx.Output("FoundInfinite"); + const auto xs = ctx.MultiInput("X"); + const auto* scale = ctx.Input("Scale"); + const auto* float_status = ctx.Input("FloatStatus"); + auto outs = ctx.MultiOutput("Out"); + auto* found_inf = ctx.Output("FoundInfinite"); found_inf->mutable_data(ctx.GetPlace()); @@ -52,7 +52,7 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { FillNpuTensorWithConstant(&const_tensor, static_cast(1.0)); // Inverse(1.0/scale) - Tensor* tmp_inverse_out = const_cast(scale); + phi::DenseTensor* tmp_inverse_out = const_cast(scale); Tensor inverse_out(scale->type()); inverse_out.Resize(scale->dims()); inverse_out.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc index cc60476c2690b..02dadf385e102 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc @@ -31,7 +31,7 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; USE_OP_ITSELF(check_finite_and_unscale); USE_OP_DEVICE_KERNEL(check_finite_and_unscale, NPU); diff --git a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc index 1f3669a4f13d7..b5750181139d4 100644 --- a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc @@ -21,14 +21,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ClearFloatStatusKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const auto* float_status = ctx.Input("FloatStatus"); - auto* float_status_out = ctx.Output("FloatStatusOut"); + const auto* float_status = ctx.Input("FloatStatus"); + auto* float_status_out = ctx.Output("FloatStatusOut"); // NOTE(zhiqiu): NPUClearFloatStatus modifies the input. PADDLE_ENFORCE_EQ(float_status_out, float_status, diff --git a/paddle/fluid/operators/amp/get_float_status_op_npu.cc b/paddle/fluid/operators/amp/get_float_status_op_npu.cc index c1e958ea4d237..8befb2df9b835 100644 --- a/paddle/fluid/operators/amp/get_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/get_float_status_op_npu.cc @@ -21,14 +21,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class GetFloatStatusKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const auto* float_status = ctx.Input("FloatStatus"); - auto* float_status_out = ctx.Output("FloatStatusOut"); + const auto* float_status = ctx.Input("FloatStatus"); + auto* float_status_out = ctx.Output("FloatStatusOut"); // GetClearFloatStatus modifies the input. PADDLE_ENFORCE_EQ(float_status_out, float_status, diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.cc b/paddle/fluid/operators/amp/update_loss_scaling_op.cc index 03a5f734c2dc8..f8ccac27c19c9 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op.cc @@ -41,7 +41,7 @@ class UpdateLossScalingOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifndef PADDLE_WITH_XPU if (var_name == "FoundInfinite" || var_name == "StopUpdate") { diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc index 24784803f1732..dc1cd958f458c 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc @@ -25,21 +25,21 @@ DECLARE_int32(min_loss_scaling); namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void Update(const platform::NPUDeviceContext& ctx, const std::vector found_inf_vec, - const Tensor* pre_loss_scaling_tensor, - const Tensor* good_in_tensor, - const Tensor* bad_in_tensor, + const phi::DenseTensor* pre_loss_scaling_tensor, + const phi::DenseTensor* good_in_tensor, + const phi::DenseTensor* bad_in_tensor, const int incr_every_n_steps, const int decr_every_n_nan_or_inf, const float incr_ratio, const float decr_ratio, - Tensor* updated_loss_scaling_tensor, - Tensor* good_out_tensor, - Tensor* bad_out_tensor) { + phi::DenseTensor* updated_loss_scaling_tensor, + phi::DenseTensor* good_out_tensor, + phi::DenseTensor* bad_out_tensor) { auto place = ctx.GetPlace(); auto stream = ctx.stream(); if (found_inf_vec[0]) { @@ -154,16 +154,16 @@ class UpdateLossScalingFunctor { public: void operator()(const platform::NPUDeviceContext& dev_ctx, const std::vector found_inf_vec, - const Tensor* pre_loss_scaling_tensor, - const Tensor* good_in_tensor, - const Tensor* bad_in_tensor, + const phi::DenseTensor* pre_loss_scaling_tensor, + const phi::DenseTensor* good_in_tensor, + const phi::DenseTensor* bad_in_tensor, const int incr_every_n_steps, const int decr_every_n_nan_or_inf, const float incr_ratio, const float decr_ratio, - Tensor* updated_loss_scaling_tensor, - Tensor* good_out_tensor, - Tensor* bad_out_tensor) const { + phi::DenseTensor* updated_loss_scaling_tensor, + phi::DenseTensor* good_out_tensor, + phi::DenseTensor* bad_out_tensor) const { Update(dev_ctx, found_inf_vec, pre_loss_scaling_tensor, @@ -184,14 +184,14 @@ class LazyZerosNPU { public: void operator()(const platform::NPUDeviceContext& dev_ctx, const std::vector found_inf_vec, - const std::vector& xs, - const std::vector& outs) const { + const std::vector& xs, + const std::vector& outs) const { if (!xs.size()) { return; } auto place = dev_ctx.GetPlace(); auto stream = dev_ctx.stream(); - Tensor* zero_tensor = nullptr; + phi::DenseTensor* zero_tensor = nullptr; void* zero_ptr = nullptr; if (found_inf_vec[0]) { int max_num = -1; @@ -234,9 +234,9 @@ class UpdateLossScalingNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - const auto xs = ctx.MultiInput("X"); - auto outs = ctx.MultiOutput("Out"); - const auto* found_inf = ctx.Input("FoundInfinite"); + const auto xs = ctx.MultiInput("X"); + auto outs = ctx.MultiOutput("Out"); + const auto* found_inf = ctx.Input("FoundInfinite"); PADDLE_ENFORCE_EQ(found_inf->numel(), 1, platform::errors::InvalidArgument( @@ -252,12 +252,13 @@ class UpdateLossScalingNPUKernel : public framework::OpKernel { return; } - const auto* pre_loss_scaling = ctx.Input("PrevLossScaling"); - const auto* good_in = ctx.Input("InGoodSteps"); - const auto* bad_in = ctx.Input("InBadSteps"); - auto* updated_loss_scaling = ctx.Output("LossScaling"); - auto* good_out = ctx.Output("OutGoodSteps"); - auto* bad_out = ctx.Output("OutBadSteps"); + const auto* pre_loss_scaling = + ctx.Input("PrevLossScaling"); + const auto* good_in = ctx.Input("InGoodSteps"); + const auto* bad_in = ctx.Input("InBadSteps"); + auto* updated_loss_scaling = ctx.Output("LossScaling"); + auto* good_out = ctx.Output("OutGoodSteps"); + auto* bad_out = ctx.Output("OutBadSteps"); updated_loss_scaling->mutable_data(dev_ctx.GetPlace()); good_out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/arg_max_op_mlu.cc b/paddle/fluid/operators/arg_max_op_mlu.cc index 44f74f016c065..6d61526bc0c96 100644 --- a/paddle/fluid/operators/arg_max_op_mlu.cc +++ b/paddle/fluid/operators/arg_max_op_mlu.cc @@ -22,8 +22,8 @@ template class ArgMaxMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto axis = static_cast(ctx.Attr("axis")); auto dtype = ctx.Attr("dtype"); const bool& flatten = ctx.Attr("flatten"); @@ -49,7 +49,7 @@ class ArgMaxMLUKernel : public framework::OpKernel { axis += x_dims.size(); } - framework::Tensor flatten_x(x->type()); + phi::DenseTensor flatten_x(x->type()); flatten_x.ShareDataWith(*x); if (flatten) { flatten_x.Resize(phi::make_ddim({x->numel()})); @@ -66,7 +66,7 @@ class ArgMaxMLUKernel : public framework::OpKernel { } size_t indices_size_inbytes = out_count * sizeof(int32_t); auto& dev_ctx = ctx.template device_context(); - framework::Tensor value_out = + phi::DenseTensor value_out = ctx.AllocateTmpTensor(out->dims(), dev_ctx); MLUCnnlTensorDesc value_out_desc(value_out); MLUCnnlTensorDesc input_desc( @@ -93,7 +93,7 @@ class ArgMaxMLUKernel : public framework::OpKernel { GetBasePtr(&value_out)); } else { out->template mutable_data(ctx.GetPlace()); - framework::Tensor out_int32 = + phi::DenseTensor out_int32 = ctx.AllocateTmpTensor(out->dims(), dev_ctx); MLUCnnl::Reduce(ctx, diff --git a/paddle/fluid/operators/arg_max_op_npu.cc b/paddle/fluid/operators/arg_max_op_npu.cc index e35b70754ae71..6e5048db47ead 100644 --- a/paddle/fluid/operators/arg_max_op_npu.cc +++ b/paddle/fluid/operators/arg_max_op_npu.cc @@ -18,7 +18,7 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -29,8 +29,8 @@ struct VisitDataArgNPUMaxFunctor { : ctx(ctx) {} template void apply() const { - auto& x = *(ctx.Input("X")); - auto& out = *(ctx.Output("Out")); + auto& x = *(ctx.Input("X")); + auto& out = *(ctx.Output("Out")); out.template mutable_data(ctx.GetPlace()); auto axis = ctx.Attr("axis"); auto dtype = ctx.Attr("dtype"); diff --git a/paddle/fluid/operators/arg_min_op_npu.cc b/paddle/fluid/operators/arg_min_op_npu.cc index 0419bbdf9f170..fe917140b7b9f 100644 --- a/paddle/fluid/operators/arg_min_op_npu.cc +++ b/paddle/fluid/operators/arg_min_op_npu.cc @@ -17,17 +17,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ArgMinNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); int64_t axis = ctx.Attr("axis"); auto dtype = ctx.Attr("dtype"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); NpuOpRunner runner; diff --git a/paddle/fluid/operators/argsort_op_mlu.cc b/paddle/fluid/operators/argsort_op_mlu.cc index edbffb6e0cfae..e1791a8356438 100644 --- a/paddle/fluid/operators/argsort_op_mlu.cc +++ b/paddle/fluid/operators/argsort_op_mlu.cc @@ -44,7 +44,7 @@ class ArgsortMLUKernel : public framework::OpKernel { indices->mutable_data(place); // cnnl only support int32/int16 type of indices - framework::Tensor indices_int32(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); indices_int32.Resize(indices->dims()); indices_int32.mutable_data(place); @@ -79,9 +79,9 @@ template class ArgsortGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* indices = ctx.Input("Indices"); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* indices = ctx.Input("Indices"); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); int axis = ctx.Attr("axis"); dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/argsort_op_npu.cc b/paddle/fluid/operators/argsort_op_npu.cc index 7d9c4ffdaf6da..7aedb41c9fde3 100644 --- a/paddle/fluid/operators/argsort_op_npu.cc +++ b/paddle/fluid/operators/argsort_op_npu.cc @@ -18,15 +18,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template static void TranposeNPU(const framework::ExecutionContext& ctx, const aclrtStream& stream, std::vector* perm, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { out->mutable_data(ctx.GetPlace()); NpuOpRunner runner; runner.SetType("Transpose") @@ -38,8 +38,8 @@ static void TranposeNPU(const framework::ExecutionContext& ctx, static void CastToInt64(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { out->mutable_data(ctx.GetPlace()); NpuOpRunner runner; runner.SetType("Cast") @@ -51,8 +51,8 @@ static void CastToInt64(const framework::ExecutionContext& ctx, static void CastToFP32(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { out->mutable_data(ctx.GetPlace()); NpuOpRunner runner; runner.SetType("Cast") @@ -66,9 +66,9 @@ template class ArgsortNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); int axis = ctx.Attr("axis"); bool descending = ctx.Attr("descending"); @@ -176,9 +176,9 @@ template static void FullAssignNPU(const framework::ExecutionContext& ctx, const aclrtStream& stream, const framework::DDim in_dims, - const Tensor& input, - const Tensor& indices, - Tensor* t_out) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* t_out) { const int64_t input_height = phi::product(phi::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; @@ -226,9 +226,9 @@ template class ArgsortGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* indices = ctx.Input("Indices"); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dO = ctx.Input(framework::GradVarName("Out")); + auto* indices = ctx.Input("Indices"); + auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dO = ctx.Input(framework::GradVarName("Out")); int axis = ctx.Attr("axis"); auto in_dims = indices->dims(); diff --git a/paddle/fluid/operators/array_operator.h b/paddle/fluid/operators/array_operator.h index 19b90d360201e..990ef8d8556b3 100644 --- a/paddle/fluid/operators/array_operator.h +++ b/paddle/fluid/operators/array_operator.h @@ -55,7 +55,7 @@ class ArrayOp : public framework::OperatorBase { platform::is_npu_place(i_tensor.place()) || platform::is_custom_place(i_tensor.place())) { // FIXME: Avoid copy from GPU to CPU - framework::Tensor t; + phi::DenseTensor t; framework::TensorCopy(i_tensor, platform::CPUPlace(), dev_ctx, &t); dev_ctx.Wait(); offset = static_cast(*t.data()); diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index 89c817889f144..9236c0b6ae5c9 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -44,8 +44,8 @@ struct ArrayToLoDFunctorImpl { }; struct ArrayToLoDFunctor : public std::unary_function { - std::vector in; - mutable framework::Tensor *out; + std::vector in; + mutable phi::DenseTensor *out; template void operator()(Place place) const { diff --git a/paddle/fluid/operators/ascend_trigger_op.h b/paddle/fluid/operators/ascend_trigger_op.h index d1eaa00c2a3e0..943960e1bb1c5 100644 --- a/paddle/fluid/operators/ascend_trigger_op.h +++ b/paddle/fluid/operators/ascend_trigger_op.h @@ -33,8 +33,8 @@ class AscendTriggerCPUKernel : public framework::OpKernel { auto ascend_ptr = paddle::framework::AscendInstance::GetInstance(); auto graph_idx = ctx.Attr("graph_idx"); VLOG(4) << "AscendTrigger Kernel, begin to run graph: " << graph_idx; - auto inputs = ctx.MultiInput("FeedList"); - auto outputs = ctx.MultiOutput("FetchList"); + auto inputs = ctx.MultiInput("FeedList"); + auto outputs = ctx.MultiOutput("FetchList"); ascend_ptr->RunAscendSubgraph(graph_idx, inputs, &outputs); #else PADDLE_THROW(platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/operators/assign_op.cc b/paddle/fluid/operators/assign_op.cc index ab6684ae33f7a..91bc5019f3f07 100644 --- a/paddle/fluid/operators/assign_op.cc +++ b/paddle/fluid/operators/assign_op.cc @@ -43,7 +43,7 @@ class AssignOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/assign_op_test.cc b/paddle/fluid/operators/assign_op_test.cc index 0b6245f17d38d..8586329e501c8 100644 --- a/paddle/fluid/operators/assign_op_test.cc +++ b/paddle/fluid/operators/assign_op_test.cc @@ -87,7 +87,7 @@ TEST(AssignOp, AssignSelectedRows) { int64_t height = 10; phi::SelectedRows input(rows, height); - paddle::framework::Tensor* input_tensor = input.mutable_value(); + phi::DenseTensor* input_tensor = input.mutable_value(); paddle::framework::DDim in_dims = phi::make_ddim({3, 4}); int* in_data = input_tensor->mutable_data(in_dims, cpu_place); @@ -104,7 +104,7 @@ TEST(AssignOp, AssignSelectedRows) { EXPECT_EQ(rows[i], out_rows[i]); } EXPECT_EQ(height, out_selected_row.height()); - const paddle::framework::Tensor& out_tensor = out_selected_row.value(); + const phi::DenseTensor& out_tensor = out_selected_row.value(); paddle::framework::DDim out_dims = out_tensor.dims(); EXPECT_EQ(in_dims, out_dims); auto* out_data = out_tensor.data(); diff --git a/paddle/fluid/operators/assign_pos_op.cu b/paddle/fluid/operators/assign_pos_op.cu index 3f36e8b13476d..f5704b6a08617 100644 --- a/paddle/fluid/operators/assign_pos_op.cu +++ b/paddle/fluid/operators/assign_pos_op.cu @@ -73,7 +73,7 @@ class AssignPosCUDAKernel : public framework::OpKernel { T* cum_data = const_cast(cum_count->data()); auto cum_size = cum_count->numel(); - framework::Tensor cpu_eff_num_len; + phi::DenseTensor cpu_eff_num_len; int64_t cpu_eff_num_len_data = 0; if (platform::is_cpu_place(eff_num_len->place())) { cpu_eff_num_len_data = eff_num_len->data()[0]; diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index 775f0788aea3f..1954c1ee1571d 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -24,17 +24,15 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; - template typename std::enable_if::value>::type CopyVectorToTensor( const char* value_name, - framework::Tensor* out, + phi::DenseTensor* out, const framework::ExecutionContext& ctx) { - // If attribute value dtype is vector, it will be converted to - // vector. - // at the same time, we can not use vector to hold the value, because - // the c++ use bit value to replace byte value. + // phi::DenseTensore dtype is vector, it will be converted to + // vector. + // at the same time, we can not use vector to hold the value, because + // the c++ use bit value to replace byte value. auto values = ctx.Attr>(value_name); framework::TensorFromVector(values, ctx.device_context(), out); @@ -51,7 +49,7 @@ typename std::enable_if::value>::type CopyVectorToTensor( template typename std::enable_if::value>::type CopyVectorToTensor( const char* value_name, - framework::Tensor* out, + phi::DenseTensor* out, const framework::ExecutionContext& ctx) { auto values = ctx.Attr>(value_name); framework::TensorFromVector(values, ctx.device_context(), out); @@ -62,7 +60,7 @@ class AssignValueKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { auto shape = ctx.Attr>("shape"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); int dtype = ctx.Attr("dtype"); const char* value_name = nullptr; switch (dtype) { diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index 203ccd8e6034d..49a847eecaeaa 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -340,21 +340,22 @@ class AttentionLSTMKernel : public framework::OpKernel { using DeviceContext = phi::CPUContext; auto* x = ctx.Input("X"); - auto* h0 = ctx.Input("H0"); - auto* c0 = ctx.Input("C0"); - auto* atten_w = ctx.Input("AttentionWeight"); - auto* atten_b = ctx.Input("AttentionBias"); - auto* atten_scalar = ctx.Input("AttentionScalar"); - auto* atten_scalar_bias = ctx.Input("AttentionScalarBias"); - auto* lstm_w = ctx.Input("LSTMWeight"); - auto* lstm_b = ctx.Input("LSTMBias"); + auto* h0 = ctx.Input("H0"); + auto* c0 = ctx.Input("C0"); + auto* atten_w = ctx.Input("AttentionWeight"); + auto* atten_b = ctx.Input("AttentionBias"); + auto* atten_scalar = ctx.Input("AttentionScalar"); + auto* atten_scalar_bias = + ctx.Input("AttentionScalarBias"); + auto* lstm_w = ctx.Input("LSTMWeight"); + auto* lstm_b = ctx.Input("LSTMBias"); auto* hidden_out = ctx.Output("Hidden"); auto* cell_out = ctx.Output("Cell"); - auto* atted_x = ctx.Output("AttentionedX"); - auto* fc_out = ctx.Output("AttentionFCOut"); - auto* lstm_x = ctx.Output("LSTMX"); - auto* lstm_out = ctx.Output("LSTMOUT"); + auto* atted_x = ctx.Output("AttentionedX"); + auto* fc_out = ctx.Output("AttentionFCOut"); + auto* lstm_x = ctx.Output("LSTMX"); + auto* lstm_out = ctx.Output("LSTMOUT"); // some shape should be reshape here since infershape can not get lod info auto x_lod = x->lod(); diff --git a/paddle/fluid/operators/attention_lstm_op.h b/paddle/fluid/operators/attention_lstm_op.h index 6ede3a7f3c96d..16142be6d1e35 100644 --- a/paddle/fluid/operators/attention_lstm_op.h +++ b/paddle/fluid/operators/attention_lstm_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AttentionLSTMOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/batch_fc_op.cu b/paddle/fluid/operators/batch_fc_op.cu index 362489e51acc2..b8b67d344d2d6 100644 --- a/paddle/fluid/operators/batch_fc_op.cu +++ b/paddle/fluid/operators/batch_fc_op.cu @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; const int CUDA_NUM_THREADS = 1024; static inline int GET_BLOCKS(const int N) { @@ -95,8 +94,8 @@ class BatchFCCUDAKernel : public framework::OpKernel { // b.dim = slot_pairs_num * out_dim // output.dim = slot_pairs_num * ins_num * out_dim auto* input = ctx.Input("Input"); - auto* w = ctx.Input("W"); - auto* bias = ctx.Input("Bias"); + auto* w = ctx.Input("W"); + auto* bias = ctx.Input("Bias"); auto* output = ctx.Output("Out"); auto input_dims = input->dims(); auto w_dims = w->dims(); @@ -154,13 +153,13 @@ template class BatchFCGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* w = ctx.Input("W"); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* input = ctx.Input("Input"); + auto* w = ctx.Input("W"); + auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("Input")); - auto* dw = ctx.Output(framework::GradVarName("W")); - auto* db = ctx.Output(framework::GradVarName("Bias")); + auto* dx = ctx.Output(framework::GradVarName("Input")); + auto* dw = ctx.Output(framework::GradVarName("W")); + auto* db = ctx.Output(framework::GradVarName("Bias")); auto input_dims = input->dims(); auto w_dims = w->dims(); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 84f22ebff4084..4979ab0345200 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -178,21 +178,24 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType( } PADDLE_ENFORCE_EQ( bn_param_type, - framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), platform::errors::InvalidArgument("Scale input should be of float type")); PADDLE_ENFORCE_EQ( bn_param_type, - framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), platform::errors::InvalidArgument("Bias input should be of float type")); PADDLE_ENFORCE_EQ( bn_param_type, - framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), + framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), platform::errors::InvalidArgument("Mean input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), - platform::errors::InvalidArgument( - "Variance input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), + platform::errors::InvalidArgument( + "Variance input should be of float type")); // TODO(pzelazko-intel): enable MKLDNN layout when it's ready #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index a19b087245a89..a9d1968d9fe58 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -34,7 +34,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; template using CudnnDataType = platform::CudnnDataType; diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 1efabccb45e60..95008b19f377d 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; @@ -54,7 +54,7 @@ class BatchNormOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; @@ -69,7 +69,7 @@ class BatchNormGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; diff --git a/paddle/fluid/operators/batch_norm_op_mlu.cc b/paddle/fluid/operators/batch_norm_op_mlu.cc index 1aa445bda3717..ef97e07ec71d1 100644 --- a/paddle/fluid/operators/batch_norm_op_mlu.cc +++ b/paddle/fluid/operators/batch_norm_op_mlu.cc @@ -38,7 +38,7 @@ class MLUBatchNormOpKernel : public framework::OpKernel { const std::string data_layout_str = ctx.Attr("data_layout"); DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - const auto *x = ctx.Input("X"); + const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); PADDLE_ENFORCE_GE( x_dims.size(), @@ -60,16 +60,16 @@ class MLUBatchNormOpKernel : public framework::OpKernel { : x_dims[x_dims.size() - 1]); const int sample_size = x->numel() / N / C; - const auto *running_mean = ctx.Input("Mean"); - const auto *running_var = ctx.Input("Variance"); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); + const auto *running_mean = ctx.Input("Mean"); + const auto *running_var = ctx.Input("Variance"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); - auto *y = ctx.Output("Y"); - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); + auto *y = ctx.Output("Y"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); // alloc memory y->mutable_data(place); @@ -115,7 +115,7 @@ class MLUBatchNormOpKernel : public framework::OpKernel { } if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); + const auto *mom_tensor = ctx.Input("MomentumTensor"); Tensor mom_cpu; framework::TensorCopySync(*mom_tensor, platform::CPUPlace(), &mom_cpu); momentum = mom_cpu.data()[0]; @@ -161,22 +161,24 @@ class MLUBatchNormGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *saved_mean = ctx.Input("SavedMean"); + const auto *x = ctx.Input("X"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *saved_mean = ctx.Input("SavedMean"); // SavedVariance have been reverted in forward operator - const auto *saved_inv_variance = ctx.Input("SavedVariance"); + const auto *saved_inv_variance = + ctx.Input("SavedVariance"); const std::string data_layout_str = ctx.Attr("data_layout"); bool use_global_stats = ctx.Attr("use_global_stats"); const bool is_test = ctx.Attr("is_test"); const float epsilon = ctx.Attr("epsilon"); DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); auto &dev_ctx = ctx.template device_context(); auto d_x_tmp = @@ -270,8 +272,8 @@ class MLUBatchNormGradOpKernel : public framework::OpKernel { } if (use_global_stats) { - const auto *running_mean = ctx.Input("Mean"); - const auto *running_variance = ctx.Input("Variance"); + const auto *running_mean = ctx.Input("Mean"); + const auto *running_variance = ctx.Input("Variance"); MLUCnnl::FusedBatchNormGrad(ctx, false /*is_training*/, transformed_desc.get(), diff --git a/paddle/fluid/operators/batch_norm_op_npu.cc b/paddle/fluid/operators/batch_norm_op_npu.cc index b369a2011aff9..034c578ddde58 100644 --- a/paddle/fluid/operators/batch_norm_op_npu.cc +++ b/paddle/fluid/operators/batch_norm_op_npu.cc @@ -36,7 +36,7 @@ class NPUBatchNormOpKernel : public framework::OpKernel { const std::string data_layout_str = ctx.Attr("data_layout"); DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - const auto *x = ctx.Input("X"); + const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); PADDLE_ENFORCE_EQ( (x_dims.size() == 4UL || x_dims.size() == 3UL), @@ -47,12 +47,12 @@ class NPUBatchNormOpKernel : public framework::OpKernel { x_dims.to_str(), x_dims.size())); - const auto *running_mean = ctx.Input("Mean"); - const auto *running_var = ctx.Input("Variance"); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); + const auto *running_mean = ctx.Input("Mean"); + const auto *running_var = ctx.Input("Variance"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); - auto *y = ctx.Output("Y"); + auto *y = ctx.Output("Y"); y->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); @@ -76,10 +76,10 @@ class NPUBatchNormOpKernel : public framework::OpKernel { {{"epsilon", epsilon}}); runner_infer.Run(stream); } else { - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); mean_out->mutable_data(ctx.GetPlace()); variance_out->mutable_data(ctx.GetPlace()); saved_mean->mutable_data(ctx.GetPlace()); @@ -88,14 +88,14 @@ class NPUBatchNormOpKernel : public framework::OpKernel { // if MomentumTensor is set, use MomentumTensor value, momentum // is only used in this training branch if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); + const auto *mom_tensor = ctx.Input("MomentumTensor"); Tensor mom_cpu; paddle::framework::TensorCopySync( *mom_tensor, platform::CPUPlace(), &mom_cpu); momentum = mom_cpu.data()[0]; } - framework::Tensor sum, square_sum; + phi::DenseTensor sum, square_sum; sum.mutable_data(running_mean->dims(), ctx.GetPlace()); square_sum.mutable_data(running_mean->dims(), ctx.GetPlace()); @@ -137,22 +137,24 @@ template class NPUBatchNormGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *saved_mean = ctx.Input("SavedMean"); + const auto *x = ctx.Input("X"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *saved_mean = ctx.Input("SavedMean"); // SavedVariance have been reverted in forward operator - const auto *saved_inv_variance = ctx.Input("SavedVariance"); + const auto *saved_inv_variance = + ctx.Input("SavedVariance"); const std::string data_layout_str = ctx.Attr("data_layout"); bool use_global_stats = ctx.Attr("use_global_stats"); const bool is_test = ctx.Attr("is_test"); const float epsilon = ctx.Attr("epsilon"); DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); use_global_stats = is_test || use_global_stats; @@ -184,8 +186,8 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel { d_scale->mutable_data(ctx.GetPlace()); d_bias->mutable_data(ctx.GetPlace()); if (use_global_stats) { - const auto *running_mean = ctx.Input("Mean"); - const auto *running_variance = ctx.Input("Variance"); + const auto *running_mean = ctx.Input("Mean"); + const auto *running_variance = ctx.Input("Variance"); const auto &runner_update = NpuOpRunner("BNTrainingUpdateGrad", {dy_tensor, x_tensor, *running_mean, *running_variance}, @@ -223,7 +225,7 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel { dx_tensor.Resize(x_new_shape); dy_tensor.Resize(x_new_shape); } - const auto *running_var = ctx.Input("Variance"); + const auto *running_var = ctx.Input("Variance"); const auto &runner_infer = NpuOpRunner("BNInferGrad", {dy_tensor, *scale, *running_var}, diff --git a/paddle/fluid/operators/bce_loss_op.cc b/paddle/fluid/operators/bce_loss_op.cc index 6e3bea17863ab..3c775ced3f434 100644 --- a/paddle/fluid/operators/bce_loss_op.cc +++ b/paddle/fluid/operators/bce_loss_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class BCELossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/bce_loss_op_mlu.cc b/paddle/fluid/operators/bce_loss_op_mlu.cc index c194da4d65bcf..99fd402424e7c 100644 --- a/paddle/fluid/operators/bce_loss_op_mlu.cc +++ b/paddle/fluid/operators/bce_loss_op_mlu.cc @@ -18,15 +18,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class BCELossMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* labels = ctx.Input("Label"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -50,10 +50,10 @@ template class BCELossGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* labels = ctx.Input("Label"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/bce_loss_op_npu.cc b/paddle/fluid/operators/bce_loss_op_npu.cc index 57dd53e5968c1..c6b2d12ac535e 100644 --- a/paddle/fluid/operators/bce_loss_op_npu.cc +++ b/paddle/fluid/operators/bce_loss_op_npu.cc @@ -18,15 +18,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class BCELossNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* labels = ctx.Input("Label"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -47,10 +47,10 @@ template class BCELossGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* labels = ctx.Input("Label"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index e9991e697903a..09adff97c1ce4 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -49,7 +49,7 @@ class BeamSearchOpKernel : public framework::OpKernel { auto selected_ids = context.Output("selected_ids"); auto selected_scores = context.Output("selected_scores"); - auto* parent_idx = context.Output("parent_idx"); + auto* parent_idx = context.Output("parent_idx"); PADDLE_ENFORCE_NOT_NULL( selected_ids, platform::errors::NotFound( diff --git a/paddle/fluid/operators/bilateral_slice_op.cc b/paddle/fluid/operators/bilateral_slice_op.cc index 19347abac5e79..db3ebba8edc5a 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cc +++ b/paddle/fluid/operators/bilateral_slice_op.cc @@ -20,7 +20,6 @@ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; class BilateralSliceOp : public framework::OperatorWithKernel { @@ -94,7 +93,7 @@ class BilateralSliceOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/bilateral_slice_op.cu b/paddle/fluid/operators/bilateral_slice_op.cu index 81afe68dbd23c..590b0d8ab39d5 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cu +++ b/paddle/fluid/operators/bilateral_slice_op.cu @@ -19,7 +19,6 @@ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; template @@ -131,10 +130,10 @@ template class BilateralSliceOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* grid = ctx.Input("Grid"); - auto* guide = ctx.Input("Guide"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* grid = ctx.Input("Grid"); + auto* guide = ctx.Input("Guide"); + auto* output = ctx.Output("Out"); auto* output_data = output->mutable_data(ctx.GetPlace()); auto* grid_data = grid->data(); @@ -447,13 +446,17 @@ template class BilateralSliceGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* guide = ctx.Input("Guide"); - auto* grid = ctx.Input("Grid"); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* grid_grad = ctx.Output(framework::GradVarName("Grid")); - auto* guide_grad = ctx.Output(framework::GradVarName("Guide")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* input = ctx.Input("X"); + auto* guide = ctx.Input("Guide"); + auto* grid = ctx.Input("Grid"); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* grid_grad = + ctx.Output(framework::GradVarName("Grid")); + auto* guide_grad = + ctx.Output(framework::GradVarName("Guide")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); const T* input_data = input->data(); const T* guide_data = guide->data(); diff --git a/paddle/fluid/operators/bincount_op.cc b/paddle/fluid/operators/bincount_op.cc index d52de7ace64ab..5f5e19c585bae 100644 --- a/paddle/fluid/operators/bincount_op.cc +++ b/paddle/fluid/operators/bincount_op.cc @@ -25,7 +25,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; class BincountOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/bmm_op.h b/paddle/fluid/operators/bmm_op.h index 110cd2d2810d8..5ca8df0182049 100644 --- a/paddle/fluid/operators/bmm_op.h +++ b/paddle/fluid/operators/bmm_op.h @@ -26,10 +26,10 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static void ReshapeTensorIntoMatrixSequence( - framework::Tensor *x, const phi::funcs::MatDescriptor &descriptor) { + phi::DenseTensor *x, const phi::funcs::MatDescriptor &descriptor) { int64_t h, w; h = descriptor.height_; w = descriptor.width_; @@ -40,9 +40,9 @@ static void ReshapeTensorIntoMatrixSequence( x->Resize({descriptor.batch_size_, h, w}); } -static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x, - framework::Tensor *y, - framework::Tensor *out, +static void ReshapeXYOutIntoMatrixSequence(phi::DenseTensor *x, + phi::DenseTensor *y, + phi::DenseTensor *out, bool trans_x, bool trans_y) { auto x_dim = x->dims(); diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index 1c7f158c14b7c..2e1d62dddd2c3 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; /*Todo: *Find a way to adapt TolerableValue, using blas or eigen. */ @@ -39,19 +38,19 @@ template class BprLossOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - auto* y = ctx.Output("Y"); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); + auto* y = ctx.Output("Y"); y->mutable_data(ctx.GetPlace()); int rank = x->dims().size(); - Tensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); - Tensor labels_2d = framework::ReshapeToMatrix(*label, rank - 1); - Tensor y_2d = framework::ReshapeToMatrix(*y, rank - 1); + phi::DenseTensor x_2d = framework::ReshapeToMatrix(*x, rank - 1); + phi::DenseTensor labels_2d = framework::ReshapeToMatrix(*label, rank - 1); + phi::DenseTensor y_2d = framework::ReshapeToMatrix(*y, rank - 1); - const framework::Tensor* logits = &x_2d; - const framework::Tensor* labels = &labels_2d; - framework::Tensor* out = &y_2d; + const phi::DenseTensor* logits = &x_2d; + const phi::DenseTensor* labels = &labels_2d; + phi::DenseTensor* out = &y_2d; const int step_size = logits->dims()[0]; const int class_num = logits->dims()[1]; @@ -87,10 +86,10 @@ template class BprLossGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* label = ctx.Input("Label"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* label = ctx.Input("Label"); + auto* dx = ctx.Output(framework::GradVarName("X")); const size_t step_size = static_cast(x->dims()[0]); const size_t num_classes = static_cast(x->dims()[1]); diff --git a/paddle/fluid/operators/broadcast_tensors_op.cc b/paddle/fluid/operators/broadcast_tensors_op.cc index 4f681bc6508d2..df91ef10b181a 100644 --- a/paddle/fluid/operators/broadcast_tensors_op.cc +++ b/paddle/fluid/operators/broadcast_tensors_op.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { using framework::DDim; -using framework::Tensor; class BroadcastTensorsOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h index 83cc2e2122539..f4121573577ad 100644 --- a/paddle/fluid/operators/cast_op.h +++ b/paddle/fluid/operators/cast_op.h @@ -30,11 +30,11 @@ struct CastOpTransformFunctor { template struct CastOpFunctor { - const framework::Tensor* in_; - framework::Tensor* out_; + const phi::DenseTensor* in_; + phi::DenseTensor* out_; const DeviceContext& ctx_; - CastOpFunctor(const framework::Tensor* in, - framework::Tensor* out, + CastOpFunctor(const phi::DenseTensor* in, + phi::DenseTensor* out, const DeviceContext& ctx) : in_(in), out_(out), ctx_(ctx) {} @@ -54,8 +54,8 @@ template class CastOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out = context.Output("Out"); + auto* in = context.Input("X"); + auto* out = context.Output("Out"); auto out_dtype = context.Attr("out_dtype"); diff --git a/paddle/fluid/operators/cast_op_mlu.cc b/paddle/fluid/operators/cast_op_mlu.cc index 2caa45702fc9d..7e85702eee4b1 100644 --- a/paddle/fluid/operators/cast_op_mlu.cc +++ b/paddle/fluid/operators/cast_op_mlu.cc @@ -19,14 +19,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CastMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); auto src_type = static_cast(ctx.Attr("in_dtype")); auto dst_type = static_cast(ctx.Attr("out_dtype")); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/cast_op_npu.cc b/paddle/fluid/operators/cast_op_npu.cc index f9ec6f0685d75..9c430fc0ffe30 100644 --- a/paddle/fluid/operators/cast_op_npu.cc +++ b/paddle/fluid/operators/cast_op_npu.cc @@ -32,15 +32,15 @@ static std::map {framework::proto::VarType::FP64, ACL_DOUBLE}, }; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CastNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); int dtype = ctx.Attr("out_dtype"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); if (framework::TransToProtoVarType(x->dtype()) == dtype) { diff --git a/paddle/fluid/operators/center_loss_op.cu b/paddle/fluid/operators/center_loss_op.cu index 2548b13559133..fed463d8f7cd7 100644 --- a/paddle/fluid/operators/center_loss_op.cu +++ b/paddle/fluid/operators/center_loss_op.cu @@ -87,10 +87,10 @@ class CenterLossCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &ctx) const override { auto &device_context = ctx.template device_context(); auto stream = device_context.stream(); - auto *X = ctx.Input("X"); // deep feature - auto *labels = ctx.Input("Label"); - auto *centers = ctx.Input("Centers"); - auto *update_rate = ctx.Input("CenterUpdateRate"); + auto *X = ctx.Input("X"); // deep feature + auto *labels = ctx.Input("Label"); + auto *centers = ctx.Input("Centers"); + auto *update_rate = ctx.Input("CenterUpdateRate"); int cluster_num = ctx.Attr("cluster_num"); auto *lr_center = update_rate->data(); bool need_update = static_cast(ctx.Attr("need_update")); @@ -102,24 +102,24 @@ class CenterLossCUDAKernel : public framework::OpKernel { int batch_size = x_dims[0]; const int deep_feat_dim = x_dims[1]; - auto *centers_diff = ctx.Output("SampleCenterDiff"); + auto *centers_diff = ctx.Output("SampleCenterDiff"); auto centers_diff_data = centers_diff->mutable_data(ctx.GetPlace()); auto centers_data = centers->data(); auto centers_dim = centers->dims(); - auto *out_loss = ctx.Output("Loss"); + auto *out_loss = ctx.Output("Loss"); auto loss_data = out_loss->mutable_data(ctx.GetPlace()); - auto *centers_out = ctx.Output("CentersOut"); + auto *centers_out = ctx.Output("CentersOut"); auto *centers_out_data = centers_out->mutable_data(ctx.GetPlace()); auto ctx_place = ctx.GetPlace(); if (centers != centers_out) { framework::TensorCopy( - *static_cast(centers), + *static_cast(centers), ctx_place, *platform::DeviceContextPool::Instance().Get(ctx_place), - static_cast(centers_out)); + static_cast(centers_out)); } int64_t numel = X->numel(); diff --git a/paddle/fluid/operators/center_loss_op.h b/paddle/fluid/operators/center_loss_op.h index 49aec390599a5..989a27f552118 100644 --- a/paddle/fluid/operators/center_loss_op.h +++ b/paddle/fluid/operators/center_loss_op.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -45,10 +45,10 @@ template class CenterLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *X = ctx.Input("X"); // deep feature - auto *labels = ctx.Input("Label"); - auto *centers = ctx.Input("Centers"); - auto *update_rate = ctx.Input("CenterUpdateRate"); + auto *X = ctx.Input("X"); // deep feature + auto *labels = ctx.Input("Label"); + auto *centers = ctx.Input("Centers"); + auto *update_rate = ctx.Input("CenterUpdateRate"); int cluster_num = ctx.Attr("cluster_num"); auto *lr_center = update_rate->data(); T alpha = lr_center[0]; @@ -64,11 +64,11 @@ class CenterLossKernel : public framework::OpKernel { int batch_size = x_dims[0]; int deep_feat_dim = x_dims[1]; - auto centers_diff = ctx.Output("SampleCenterDiff"); + auto centers_diff = ctx.Output("SampleCenterDiff"); auto centers_diff_data = centers_diff->mutable_data(ctx.GetPlace()); - auto *out_loss = ctx.Output("Loss"); + auto *out_loss = ctx.Output("Loss"); - auto *centers_out = ctx.Output("CentersOut"); + auto *centers_out = ctx.Output("CentersOut"); auto *centers_out_data = centers_out->mutable_data(ctx.GetPlace()); if (centers_out_data != centers_data) { @@ -138,9 +138,9 @@ template class CenterLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in0 = context.Input("SampleCenterDiff"); - auto *in1 = context.Input(framework::GradVarName("Loss")); - auto *x_g = context.Output(framework::GradVarName("X")); + auto *in0 = context.Input("SampleCenterDiff"); + auto *in1 = context.Input(framework::GradVarName("Loss")); + auto *x_g = context.Output(framework::GradVarName("X")); auto sub_result = EigenMatrix::From(*in0); auto out_grad = EigenMatrix::From(*in1); diff --git a/paddle/fluid/operators/chunk_eval_op.h b/paddle/fluid/operators/chunk_eval_op.h index 8784c49659669..823a759dddc74 100644 --- a/paddle/fluid/operators/chunk_eval_op.h +++ b/paddle/fluid/operators/chunk_eval_op.h @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; template @@ -191,12 +190,13 @@ class ChunkEvalKernel : public framework::OpKernel { auto* inference = context.Input("Inference"); auto place = inference->place(); auto* label = context.Input("Label"); - auto* precision = context.Output("Precision"); - auto* recall = context.Output("Recall"); - auto* f1 = context.Output("F1-Score"); - auto* num_infer_chunks = context.Output("NumInferChunks"); - auto* num_label_chunks = context.Output("NumLabelChunks"); - auto* num_correct_chunks = context.Output("NumCorrectChunks"); + auto* precision = context.Output("Precision"); + auto* recall = context.Output("Recall"); + auto* f1 = context.Output("F1-Score"); + auto* num_infer_chunks = context.Output("NumInferChunks"); + auto* num_label_chunks = context.Output("NumLabelChunks"); + auto* num_correct_chunks = + context.Output("NumCorrectChunks"); const int64_t* inference_data = inference->data(); const int64_t* label_data = label->data(); @@ -219,7 +219,7 @@ class ChunkEvalKernel : public framework::OpKernel { if (use_padding) { auto dim1 = inference->dims()[1]; - auto* seq_length_t = context.Input("SeqLength"); + auto* seq_length_t = context.Input("SeqLength"); auto* seq_length_data = seq_length_t->data(); num_sequences = seq_length_t->dims()[0]; diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index 0bbbcc8b03177..a868a182bfc5e 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -52,7 +52,7 @@ class CinnCompiledObject; namespace operators::details { -using CinnTensor = ::cinn::hlir::framework::Tensor; +using CinnTensor = ::cinn::hlir::Tensor; using CinnScope = ::cinn::hlir::framework::Scope; using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject; diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h index 6fde5106f10a4..841a12ac81bd6 100644 --- a/paddle/fluid/operators/clip_by_norm_op.h +++ b/paddle/fluid/operators/clip_by_norm_op.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; // using SelectedRows = phi::SelectedRows; template class NPUClipByNormKernel : public framework::OpKernel { @@ -39,8 +39,8 @@ class NPUClipByNormKernel : public framework::OpKernel { context.template device_context(); auto stream = dev_ctx.stream(); - auto* input = context.Input("X"); - auto* output = context.Output("Out"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); output->mutable_data(place); PADDLE_ENFORCE_NOT_NULL(input, diff --git a/paddle/fluid/operators/clip_by_norm_op_xpu.cc b/paddle/fluid/operators/clip_by_norm_op_xpu.cc index dcf3a7826f5fc..b99d12b8628e8 100644 --- a/paddle/fluid/operators/clip_by_norm_op_xpu.cc +++ b/paddle/fluid/operators/clip_by_norm_op_xpu.cc @@ -27,12 +27,12 @@ class XPUClipByNormKernel : public framework::OpKernel { auto max_norm = context.Attr("max_norm"); auto in_var = context.InputVar("X"); - Tensor* output = nullptr; - const Tensor* input = nullptr; + phi::DenseTensor* output = nullptr; + const phi::DenseTensor* input = nullptr; if (in_var->IsType()) { - input = context.Input("X"); + input = context.Input("X"); - output = context.Output("Out"); + output = context.Output("Out"); output->mutable_data(context.GetPlace()); } else { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/clip_op_mlu.cc b/paddle/fluid/operators/clip_op_mlu.cc index 88cce62de6cac..daced778a95dc 100644 --- a/paddle/fluid/operators/clip_op_mlu.cc +++ b/paddle/fluid/operators/clip_op_mlu.cc @@ -22,15 +22,15 @@ template class ClipMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto min = static_cast(ctx.Attr("min")); auto max = static_cast(ctx.Attr("max")); if (ctx.HasInput("Min")) { Tensor min_cpu; - auto* min_tensor = ctx.Input("Min"); + auto* min_tensor = ctx.Input("Min"); auto* min_data = min_tensor->data(); if (platform::is_mlu_place(min_tensor->place())) { paddle::framework::TensorCopySync( @@ -42,7 +42,7 @@ class ClipMLUKernel : public framework::OpKernel { if (ctx.HasInput("Max")) { Tensor max_cpu; - auto* max_tensor = ctx.Input("Max"); + auto* max_tensor = ctx.Input("Max"); auto* max_data = max_tensor->data(); if (platform::is_mlu_place(max_tensor->place())) { paddle::framework::TensorCopySync( @@ -68,13 +68,15 @@ template class ClipGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); - auto* min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; - auto* max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; + auto* min_tensor = + ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; + auto* max_tensor = + ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; auto min_val = ctx.Attr("min"); if (min_tensor) { diff --git a/paddle/fluid/operators/clip_op_npu.cc b/paddle/fluid/operators/clip_op_npu.cc index 0a7b05f06814d..19ae23add0e10 100644 --- a/paddle/fluid/operators/clip_op_npu.cc +++ b/paddle/fluid/operators/clip_op_npu.cc @@ -18,18 +18,20 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ClipNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); - auto min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; - auto max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; + auto min_tensor = + ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; + auto max_tensor = + ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; Tensor min_tensor_temp(x->type()); Tensor max_tensor_temp(x->type()); @@ -60,13 +62,15 @@ template class ClipGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); - auto* min_tensor = ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; - auto* max_tensor = ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; + auto* min_tensor = + ctx.HasInput("Min") ? ctx.Input("Min") : nullptr; + auto* max_tensor = + ctx.HasInput("Max") ? ctx.Input("Max") : nullptr; auto min_val = ctx.Attr("min"); if (min_tensor) { diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 3e77bd91baf29..beb02ad5a987b 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -414,7 +414,7 @@ class CoalesceTensorOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/collective/allreduce_op.h b/paddle/fluid/operators/collective/allreduce_op.h index 12507d76fe73a..a4f935a9c9586 100644 --- a/paddle/fluid/operators/collective/allreduce_op.h +++ b/paddle/fluid/operators/collective/allreduce_op.h @@ -39,8 +39,8 @@ class AllReduceOpKernel : public framework::OpKernel { "AllReduce op can run on gpu place only for now.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) auto& dev_ctx = ctx.template device_context(); - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); int dtype = platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); diff --git a/paddle/fluid/operators/collective/barrier_op.cu.cc b/paddle/fluid/operators/collective/barrier_op.cu.cc index de15395eb4df5..622b25f2a49bb 100644 --- a/paddle/fluid/operators/collective/barrier_op.cu.cc +++ b/paddle/fluid/operators/collective/barrier_op.cu.cc @@ -27,8 +27,8 @@ class BarrierOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); ncclDataType_t dtype = diff --git a/paddle/fluid/operators/collective/broadcast_op.cu.cc b/paddle/fluid/operators/collective/broadcast_op.cu.cc index 4f21dc2992a39..9d1fedc16908d 100644 --- a/paddle/fluid/operators/collective/broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/broadcast_op.cu.cc @@ -39,8 +39,8 @@ class NCCLBroadcastOpKernel : public framework::OpKernel { int dev_id = ctx.GetPlace().device; int root_dev_id = ctx.Attr("root"); - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); PADDLE_ENFORCE_EQ( out->IsInitialized(), true, diff --git a/paddle/fluid/operators/collective/broadcast_op_xpu.cc b/paddle/fluid/operators/collective/broadcast_op_xpu.cc index 437a93da18843..54eccbead94be 100644 --- a/paddle/fluid/operators/collective/broadcast_op_xpu.cc +++ b/paddle/fluid/operators/collective/broadcast_op_xpu.cc @@ -44,8 +44,8 @@ class BKCLBroadcastOpKernel : public framework::OpKernel { int dev_id = ctx.GetPlace().device; int root_dev_id = ctx.Attr("root"); - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); PADDLE_ENFORCE_EQ( out->IsInitialized(), true, diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index 8356bbb65a8a7..963eda0723080 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -30,8 +30,8 @@ class CAllGatherOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); ncclDataType_t dtype = platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); diff --git a/paddle/fluid/operators/collective/c_allgather_op.h b/paddle/fluid/operators/collective/c_allgather_op.h index 364b813629bd3..198ec4009f4d3 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.h +++ b/paddle/fluid/operators/collective/c_allgather_op.h @@ -37,8 +37,8 @@ class CAllGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_GLOO) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); framework::DDim out_dims = in->dims(); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/collective/c_allgather_op_mlu.cc b/paddle/fluid/operators/collective/c_allgather_op_mlu.cc index fc3ad8a006ec5..7bd30ecadc8c8 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_mlu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_mlu.cc @@ -28,8 +28,8 @@ class CAllGatherOpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_CNCL) - auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); cnclDataType_t dtype = platform::ToCNCLDataType(framework::TransToProtoVarType(x->dtype())); diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu.cc b/paddle/fluid/operators/collective/c_allgather_op_npu.cc index f682872d5c662..b535441ea28ee 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu.cc @@ -29,8 +29,8 @@ class CAllGatherOpASCENDKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { #if defined(PADDLE_WITH_ASCEND_CL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); HcclDataType dtype = platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); diff --git a/paddle/fluid/operators/collective/c_allgather_op_xpu.cc b/paddle/fluid/operators/collective/c_allgather_op_xpu.cc index ca865f7522a23..107f5ccd1b563 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_xpu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_xpu.cc @@ -27,8 +27,8 @@ class CAllGatherOpXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_XPU_BKCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); BKCLDataType dtype = platform::ToBKCLDataType(framework::TransToProtoVarType(in->dtype())); diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 299dd59d5efa7..87c81fdd738d5 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -83,8 +83,8 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_GLOO) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); int64_t send_numel = in->numel(); @@ -138,8 +138,8 @@ class CAllReduceOpCPUKernel : public framework::OpKernel { // return true if found_nan or return false; inline bool ContainsNan(const paddle::platform::NPUDeviceContext& dev_ctx, aclrtStream stream, - const paddle::framework::Tensor* in) { - using Tensor = paddle::framework::Tensor; + const phi::DenseTensor* in) { + using Tensor = phi::DenseTensor; Tensor out(in->type()); Tensor mean(in->type()); @@ -180,8 +180,8 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_ASCEND_CL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); HcclDataType dtype = platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); @@ -237,7 +237,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { << ", use_calc_stream:" << ctx.Attr("use_calc_stream") << ", stream:" << stream; - framework::Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data({8}, ctx.GetPlace()); bool found_nan = false; @@ -263,7 +263,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { T inf = static_cast(std::numeric_limits::infinity()); VLOG(4) << "fill input data constant inf"; auto dims = in->dims(); - auto mutable_in = const_cast(in); + auto mutable_in = const_cast(in); FillNpuTensorWithConstant(mutable_in, inf); mutable_in->Resize(dims); } @@ -296,8 +296,8 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_XPU_BKCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); BKCLDataType dtype = @@ -365,8 +365,8 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); int rid = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); @@ -465,8 +465,8 @@ class CAllReduceOpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_CNCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); cnclDataType_t dtype = diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index e43c67d7bf369..2a40b1e45911e 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -73,10 +73,10 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel { if (out != x) { framework::TensorCopy( - *static_cast(x), + *static_cast(x), place, *platform::DeviceContextPool::Instance().Get(place), - static_cast(out)); + static_cast(out)); } } else { PADDLE_ENFORCE_GPU_SUCCESS( diff --git a/paddle/fluid/operators/collective/c_broadcast_op.h b/paddle/fluid/operators/collective/c_broadcast_op.h index 2ccdd91317656..140a4383211f4 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.h +++ b/paddle/fluid/operators/collective/c_broadcast_op.h @@ -36,8 +36,8 @@ class CBroadcastOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_GLOO) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto root = ctx.Attr("root"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/collective/c_broadcast_op_mlu.cc b/paddle/fluid/operators/collective/c_broadcast_op_mlu.cc index ac24451cf81ba..29bbd2afce1fc 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_mlu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_mlu.cc @@ -59,10 +59,10 @@ class CBroadcastOPMLUKernel : public framework::OpKernel { if (out != x) { framework::TensorCopy( - *static_cast(x), + *static_cast(x), place, *platform::DeviceContextPool::Instance().Get(place), - static_cast(out)); + static_cast(out)); } } else { PADDLE_ENFORCE_MLU_SUCCESS(cnclBcast(out->mutable_data(place), diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc index da394182c1736..9bace16f15482 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc @@ -65,10 +65,10 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel { dev_ctx->Wait(); if (out != x) { - framework::TensorCopy(*static_cast(x), + framework::TensorCopy(*static_cast(x), place, *platform::DeviceContextPool::Instance().Get(place), - static_cast(out)); + static_cast(out)); } dev_ctx->Wait(); diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index 74bdd2b63ae57..e2ee9cefdbfb2 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -32,8 +32,8 @@ template class CConcatOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); ncclDataType_t dtype = platform::ToNCCLDataType(framework::TransToProtoVarType(x->dtype())); @@ -62,7 +62,7 @@ class CConcatOpCUDAKernel : public framework::OpKernel { nranks)); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - framework::Tensor temp_out; + phi::DenseTensor temp_out; framework::DDim temp_out_dims = x->dims(); temp_out_dims[0] *= nranks; temp_out.mutable_data(temp_out_dims, place); @@ -101,14 +101,14 @@ class CConcatOpCUDAKernel : public framework::OpKernel { stream)); } - std::vector inputs; + std::vector inputs; int axis = x->dims().size() - 1; auto out_dims = x->dims(); out_dims[out_dims.size() - 1] *= nranks; int rows_per_tensor = x->dims()[0]; int offset = 0; for (int i = 0; i < nranks; i++) { - framework::Tensor temp = temp_out.Slice(offset, offset + rows_per_tensor); + phi::DenseTensor temp = temp_out.Slice(offset, offset + rows_per_tensor); inputs.emplace_back(temp); offset += rows_per_tensor; } diff --git a/paddle/fluid/operators/collective/c_embedding_op_npu.cc b/paddle/fluid/operators/collective/c_embedding_op_npu.cc index 95ef754196fea..cb016f76110e4 100644 --- a/paddle/fluid/operators/collective/c_embedding_op_npu.cc +++ b/paddle/fluid/operators/collective/c_embedding_op_npu.cc @@ -51,21 +51,21 @@ void shard_index(const Tensor &table_t, auto stream = context.template device_context() .stream(); - framework::Tensor id_t_d; + phi::DenseTensor id_t_d; id_t_d.mutable_data(ids_t.dims(), context.GetPlace()); FillNPU(&id_t_d, static_cast(0.0), context); id_t_d.Resize(ids_t.dims()); - framework::Tensor id_t_u; + phi::DenseTensor id_t_u; id_t_u.mutable_data(ids_t.dims(), context.GetPlace()); FillNPU(&id_t_u, static_cast(height - 1), context); id_t_u.Resize(ids_t.dims()); - framework::Tensor id_matched_d; + phi::DenseTensor id_matched_d; id_matched_d.mutable_data(ids_t.dims(), context.GetPlace()); - framework::Tensor id_matched_u; + phi::DenseTensor id_matched_u; id_matched_u.mutable_data(ids_t.dims(), context.GetPlace()); - framework::Tensor ignore_tensor; + phi::DenseTensor ignore_tensor; ignore_tensor.mutable_data(ids_t.dims(), context.GetPlace()); FillNPU(&ignore_tensor, static_cast(height), context); ignore_tensor.Resize(ids_t.dims()); @@ -120,7 +120,7 @@ void NPUGetIdsEmbedding(const framework::ExecutionContext &context) { context.template device_context() .stream(); - framework::Tensor ids_t_local; + phi::DenseTensor ids_t_local; ids_t_local.mutable_data(ids_t->dims(), context.GetPlace()); shard_index(*table_t, *ids_t, start_idx, ids_t_local, context); @@ -185,7 +185,7 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) { const int64_t start_idx = context.Attr("start_index"); auto ids_t = context.Input("Ids"); auto d_output_t = context.Input(framework::GradVarName("Out")); - auto table_t = context.Input("W"); + auto table_t = context.Input("W"); auto table_grad_t = context.Output(framework::GradVarName("W")); VLOG(10) << "ids_t:" << ids_t << ", d_output_t:" << d_output_t @@ -196,7 +196,7 @@ void NPUUpdateEmbedding(const framework::ExecutionContext &context) { .stream(); // convert ids_t to local valid - framework::Tensor ids_t_local; + phi::DenseTensor ids_t_local; ids_t_local.mutable_data(ids_t->dims(), context.GetPlace()); shard_index(*table_t, *ids_t, start_idx, ids_t_local, context); diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h index dae4fa497f7fb..1b6149f3fd55e 100644 --- a/paddle/fluid/operators/collective/c_reduce_op.h +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -78,8 +78,8 @@ class CReduceOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_GLOO) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto root_id = ctx.Attr("root_id"); auto place = ctx.GetPlace(); @@ -223,8 +223,8 @@ class CReduceOpXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_XPU_BKCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); BKCLDataType dtype = @@ -294,8 +294,8 @@ class CReduceOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto place = ctx.GetPlace(); ncclDataType_t dtype = diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 354c31c213b63..9495ba44ca8a6 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -27,8 +27,8 @@ class CReduceScatterOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); int rid = ctx.Attr("ring_id"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc index d366e3e867c06..81831d9c69328 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu.cc @@ -27,8 +27,8 @@ class CReduceScatterOpAscendKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_ASCEND_CL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); int ring_id = ctx.Attr("ring_id"); std::string group = diff --git a/paddle/fluid/operators/collective/c_scatter_op.cu.cc b/paddle/fluid/operators/collective/c_scatter_op.cu.cc index 42d9ed2342ca0..9d53856a74b00 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_scatter_op.cu.cc @@ -68,7 +68,7 @@ class CScatterOpCUDAKernel : public framework::OpKernel { framework::DDim x_dims = x->dims(); framework::DDim out_dims(x_dims); - framework::Tensor temp; + phi::DenseTensor temp; auto out_ptr = temp.mutable_data(out_dims, place); if (root_id == comm->rank()) { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( @@ -79,10 +79,10 @@ class CScatterOpCUDAKernel : public framework::OpKernel { comm->comm(), stream)); - framework::TensorCopy(*static_cast(x), + framework::TensorCopy(*static_cast(x), place, *platform::DeviceContextPool::Instance().Get(place), - static_cast(&temp)); + static_cast(&temp)); } else { PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclBcast( out_ptr, numel, dtype, root_id, comm->comm(), stream)); @@ -94,9 +94,9 @@ class CScatterOpCUDAKernel : public framework::OpKernel { temp = temp.Slice(start_index, end_index); temp.Resize(out_dims); out->mutable_data(out_dims, place); - framework::TensorCopySync(*static_cast(&temp), + framework::TensorCopySync(*static_cast(&temp), place, - static_cast(out)); + static_cast(out)); out->Resize(out_dims); #else PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/collective/c_scatter_op.h b/paddle/fluid/operators/collective/c_scatter_op.h index ff59f91d32dc4..8e603d87456a9 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.h +++ b/paddle/fluid/operators/collective/c_scatter_op.h @@ -36,8 +36,8 @@ class CScatterOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_GLOO) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); auto root_id = ctx.Attr("root"); auto gloo = paddle::framework::GlooWrapper::GetInstance(); diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu index ef7e298aaf6a3..455dcd6d7f9fd 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; @@ -97,10 +97,10 @@ class CSoftmaxWithCrossEntropyOpCUDAKernel : public framework::OpKernel { template struct CSoftmaxWithCrossEntropyFunctor { void operator()(const framework::ExecutionContext& ctx) { - const Tensor* logits = ctx.Input("Logits"); - const Tensor* labels = ctx.Input("Label"); - Tensor* softmax = ctx.Output("Softmax"); - Tensor* loss = ctx.Output("Loss"); + const phi::DenseTensor* logits = ctx.Input("Logits"); + const phi::DenseTensor* labels = ctx.Input("Label"); + phi::DenseTensor* softmax = ctx.Output("Softmax"); + phi::DenseTensor* loss = ctx.Output("Loss"); const int rid = ctx.Attr("ring_id"); const int nranks = ctx.Attr("nranks"); @@ -250,10 +250,10 @@ struct CSoftmaxWithCrossEntropyFunctor { template struct CSoftmaxWithCrossEntropyProcessGroupFunctor { void operator()(const framework::ExecutionContext& ctx) { - const Tensor* logits = ctx.Input("Logits"); - const Tensor* labels = ctx.Input("Label"); - Tensor* softmax = ctx.Output("Softmax"); - Tensor* loss = ctx.Output("Loss"); + const phi::DenseTensor* logits = ctx.Input("Logits"); + const phi::DenseTensor* labels = ctx.Input("Label"); + phi::DenseTensor* softmax = ctx.Output("Softmax"); + phi::DenseTensor* loss = ctx.Output("Loss"); const int rid = ctx.Attr("ring_id"); const int nranks = ctx.Attr("nranks"); @@ -384,12 +384,13 @@ template class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - const Tensor* labels = context.Input("Label"); - const Tensor* loss_grad = - context.Input(framework::GradVarName("Loss")); - Tensor* logit_grad = - context.Output(framework::GradVarName("Logits")); - const Tensor* softmax = context.Input("Softmax"); + const phi::DenseTensor* labels = context.Input("Label"); + const phi::DenseTensor* loss_grad = + context.Input(framework::GradVarName("Loss")); + phi::DenseTensor* logit_grad = + context.Output(framework::GradVarName("Logits")); + const phi::DenseTensor* softmax = + context.Input("Softmax"); const int rank = context.Attr("rank"); auto& dev_ctx = context.template device_context(); diff --git a/paddle/fluid/operators/collective/c_split_op.cu b/paddle/fluid/operators/collective/c_split_op.cu index 5b34e4ba9d594..2089c23fa6ec5 100644 --- a/paddle/fluid/operators/collective/c_split_op.cu +++ b/paddle/fluid/operators/collective/c_split_op.cu @@ -56,8 +56,8 @@ template class CSplitOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); int nranks = ctx.Attr("nranks"); int rank = ctx.Attr("rank"); diff --git a/paddle/fluid/operators/collective/global_gather_op.cu.cc b/paddle/fluid/operators/collective/global_gather_op.cu.cc index 3d7ab09f45e7d..89714eb8d2744 100644 --- a/paddle/fluid/operators/collective/global_gather_op.cu.cc +++ b/paddle/fluid/operators/collective/global_gather_op.cu.cc @@ -48,7 +48,7 @@ struct GlobalGatherFunctor { const int64_t* cpu_global_count_data; auto local_count_len = 0; - framework::Tensor cpu_local_count; + phi::DenseTensor cpu_local_count; if (platform::is_cpu_place(local_count->place())) { cpu_local_count_data = local_count->data(); local_count_len = local_count->numel(); @@ -59,7 +59,7 @@ struct GlobalGatherFunctor { local_count_len = cpu_local_count.numel(); } - framework::Tensor cpu_global_count; + phi::DenseTensor cpu_global_count; if (platform::is_cpu_place(global_count->place())) { cpu_global_count_data = global_count->data(); } else { @@ -169,7 +169,7 @@ struct GlobalGatherProcessGroupFunctor { const int64_t* cpu_global_count_data; auto local_count_len = 0; - framework::Tensor cpu_local_count; + phi::DenseTensor cpu_local_count; if (platform::is_cpu_place(local_count->place())) { cpu_local_count_data = local_count->data(); local_count_len = local_count->numel(); @@ -180,7 +180,7 @@ struct GlobalGatherProcessGroupFunctor { local_count_len = cpu_local_count.numel(); } - framework::Tensor cpu_global_count; + phi::DenseTensor cpu_global_count; if (platform::is_cpu_place(global_count->place())) { cpu_global_count_data = global_count->data(); } else { diff --git a/paddle/fluid/operators/collective/global_scatter_op.cu.cc b/paddle/fluid/operators/collective/global_scatter_op.cu.cc index 1337901f185af..d53afb919ccf0 100644 --- a/paddle/fluid/operators/collective/global_scatter_op.cu.cc +++ b/paddle/fluid/operators/collective/global_scatter_op.cu.cc @@ -46,7 +46,7 @@ struct GlobalScatterFunctor { auto out = ctx.Output("Out"); const int64_t* cpu_local_count_data; const int64_t* cpu_global_count_data; - framework::Tensor cpu_local_count; + phi::DenseTensor cpu_local_count; if (platform::is_cpu_place(local_count->place())) { cpu_local_count_data = local_count->data(); } else { @@ -55,7 +55,7 @@ struct GlobalScatterFunctor { cpu_local_count_data = cpu_local_count.data(); } auto global_count_len = 0; - framework::Tensor cpu_global_count; + phi::DenseTensor cpu_global_count; if (platform::is_cpu_place(global_count->place())) { cpu_global_count_data = global_count->data(); global_count_len = global_count->numel(); @@ -167,7 +167,7 @@ struct GlobalScatterProcessGroupFunctor { auto out = ctx.Output("Out"); const int64_t* cpu_local_count_data; const int64_t* cpu_global_count_data; - framework::Tensor cpu_local_count; + phi::DenseTensor cpu_local_count; if (platform::is_cpu_place(local_count->place())) { cpu_local_count_data = local_count->data(); } else { @@ -176,7 +176,7 @@ struct GlobalScatterProcessGroupFunctor { cpu_local_count_data = cpu_local_count.data(); } auto global_count_len = 0; - framework::Tensor cpu_global_count; + phi::DenseTensor cpu_global_count; if (platform::is_cpu_place(global_count->place())) { cpu_global_count_data = global_count->data(); global_count_len = global_count->numel(); diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc index 6bc18254737d3..eeda5c72d9cae 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc @@ -28,8 +28,8 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); int64_t numel = in->numel(); ncclDataType_t dtype = platform::ToNCCLDataType(framework::TransToProtoVarType(in->dtype())); diff --git a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc index 6b573c94535f7..d032839b83323 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc @@ -26,8 +26,8 @@ class CallPartialGatherOpASCENDKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { #if defined(PADDLE_WITH_ASCEND_CL) - auto in = ctx.Input("X"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto out = ctx.Output("Out"); int64_t numel = in->numel(); HcclDataType dtype = platform::ToHCCLDataType(framework::TransToProtoVarType(in->dtype())); diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index ec18a172e1f8b..f68c2caf32bcb 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -46,7 +46,7 @@ framework::DDim recv_shape_info(const platform::Place &place, platform::ToNCCLDataType(framework::TransToProtoVarType(shape_dytpe)); // step1: recv the shape size - framework::Tensor gpu_shape_size_tensor(shape_dytpe); + phi::DenseTensor gpu_shape_size_tensor(shape_dytpe); if (!group) { gpu_shape_size_tensor.Resize({1}); gpu_shape_size_tensor.mutable_data(place, shape_dytpe); @@ -56,11 +56,11 @@ framework::DDim recv_shape_info(const platform::Place &place, } // copy the shape size tensor to cpu - framework::Tensor *cpu_shape_size_tensor = new framework::Tensor(shape_dytpe); + phi::DenseTensor *cpu_shape_size_tensor = new phi::DenseTensor(shape_dytpe); cpu_shape_size_tensor->Resize({1}); cpu_shape_size_tensor->mutable_data(platform::CPUPlace(), shape_dytpe); if (group) { - std::vector shape_size_tensor; + std::vector shape_size_tensor; shape_size_tensor.emplace_back(*cpu_shape_size_tensor); auto shape_size_task = group->Recv(shape_size_tensor, peer); } else { @@ -72,7 +72,7 @@ framework::DDim recv_shape_info(const platform::Place &place, VLOG(3) << "recv the shape size: " << shape_size << " from peer"; // step2: recv the shape - framework::Tensor gpu_shape_tensor(shape_dytpe); + phi::DenseTensor gpu_shape_tensor(shape_dytpe); if (!group) { gpu_shape_tensor.Resize({shape_size}); gpu_shape_tensor.mutable_data(place, shape_dytpe); @@ -82,11 +82,11 @@ framework::DDim recv_shape_info(const platform::Place &place, } // copy the shape tensor to cpu - framework::Tensor *cpu_shape_tensor = new framework::Tensor(shape_dytpe); + phi::DenseTensor *cpu_shape_tensor = new phi::DenseTensor(shape_dytpe); cpu_shape_tensor->Resize({shape_size}); cpu_shape_tensor->mutable_data(platform::CPUPlace(), shape_dytpe); if (group) { - std::vector shape_tensor; + std::vector shape_tensor; shape_tensor.emplace_back(*cpu_shape_tensor); auto shape_task = group->Recv(shape_tensor, peer); } else { diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 37b18703031de..9f63403dc43b2 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -26,7 +26,7 @@ namespace operators { #if (defined(PADDLE_WITH_RCCL) || defined(PADDLE_WITH_NCCL)) && \ NCCL_VERSION_CODE >= 2703 -void send_shape_info(const framework::Tensor& x, +void send_shape_info(const phi::DenseTensor& x, const platform::Place& place, const gpuStream_t& stream, platform::NCCLComm* comm, @@ -47,20 +47,19 @@ void send_shape_info(const framework::Tensor& x, int shape_size = dims.size(); // step1: send the shape size - framework::Tensor cpu_shape_size_tensor(shape_dytpe); + phi::DenseTensor cpu_shape_size_tensor(shape_dytpe); cpu_shape_size_tensor.Resize({1}); cpu_shape_size_tensor.mutable_data(platform::CPUPlace(), shape_dytpe); auto* cpu_data = cpu_shape_size_tensor.data(); cpu_data[0] = shape_size; if (group) { - std::vector shape_size_tensor; + std::vector shape_size_tensor; shape_size_tensor.template emplace_back(cpu_shape_size_tensor); auto shape_size_task = group->Send(shape_size_tensor, peer); } else { // copy the shape size tensor to gpu and send - framework::Tensor* gpu_shape_size_tensor = - new framework::Tensor(shape_dytpe); + phi::DenseTensor* gpu_shape_size_tensor = new phi::DenseTensor(shape_dytpe); gpu_shape_size_tensor->Resize({1}); gpu_shape_size_tensor->mutable_data(place, shape_dytpe); framework::TensorCopySync( @@ -76,7 +75,7 @@ void send_shape_info(const framework::Tensor& x, VLOG(3) << "send the shape size: " << shape_size << " to peer"; // step2: send the shape - framework::Tensor cpu_shape_tensor(shape_dytpe); + phi::DenseTensor cpu_shape_tensor(shape_dytpe); cpu_shape_tensor.Resize({shape_size}); cpu_shape_tensor.mutable_data(platform::CPUPlace(), shape_dytpe); auto* cpu_shape_data = cpu_shape_tensor.data(); @@ -85,12 +84,12 @@ void send_shape_info(const framework::Tensor& x, } if (group) { - std::vector shape_tensor; + std::vector shape_tensor; shape_tensor.template emplace_back(cpu_shape_tensor); auto shape_task = group->Send(shape_tensor, peer); } else { // copy the shape tensor to gpu and send - framework::Tensor* gpu_shape_tensor = new framework::Tensor(shape_dytpe); + phi::DenseTensor* gpu_shape_tensor = new phi::DenseTensor(shape_dytpe); gpu_shape_tensor->Resize({shape_size}); gpu_shape_tensor->mutable_data(place, shape_dytpe); framework::TensorCopySync(cpu_shape_tensor, place, gpu_shape_tensor); diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 7c3a8103e1dbb..a875f1fc8df9e 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -30,7 +30,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class ConcatOp : public framework::OperatorWithKernel { public: @@ -39,7 +39,7 @@ class ConcatOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto inputs = ctx.MultiInput("X"); + auto inputs = ctx.MultiInput("X"); auto input_data_type = framework::proto::VarType::Type(0); bool flag = 0; for (auto *input : inputs) { @@ -66,7 +66,7 @@ class ConcatOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "AxisTensor") { return expected_kernel_type; @@ -145,7 +145,7 @@ class ConcatOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "AxisTensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc index a4cc1c37db0cf..38e87ad45bf27 100644 --- a/paddle/fluid/operators/concat_op_mlu.cc +++ b/paddle/fluid/operators/concat_op_mlu.cc @@ -31,7 +31,7 @@ class ConcatMLUKernel : public framework::OpKernel { auto ins_size = ins.size(); bool need_resize_out_dims = false; if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); + auto* axis_tensor = ctx.Input("AxisTensor"); axis = GetDataFromTensor(axis_tensor)[0]; need_resize_out_dims = true; } @@ -84,8 +84,7 @@ template class ConcatGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto out_var_names = ctx.OutputNames(framework::GradVarName("X")); auto outs = @@ -98,7 +97,7 @@ class ConcatGradMLUKernel : public framework::OpKernel { "The first input tensor is not initalized.")); if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); + auto* axis_tensor = ctx.Input("AxisTensor"); axis = GetDataFromTensor(axis_tensor)[0]; } diff --git a/paddle/fluid/operators/concat_op_npu.cc b/paddle/fluid/operators/concat_op_npu.cc index 3dc0d28bd452d..6c2c48292adbd 100644 --- a/paddle/fluid/operators/concat_op_npu.cc +++ b/paddle/fluid/operators/concat_op_npu.cc @@ -39,7 +39,7 @@ class ConcatNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); out->mutable_data(place); - std::vector inputs; + std::vector inputs; std::vector names; for (size_t i = 0; i < ins.size(); ++i) { if (ins[i] && ins[i]->numel() > 0) { @@ -66,8 +66,7 @@ template class ConcatGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto out_var_names = ctx.OutputNames(framework::GradVarName("X")); auto outs = diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index c1ed46867f1ac..ec1ad1475d644 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -30,7 +30,7 @@ static void DataCopy(const framework::LoDTensor &src_item, #ifdef PADDLE_WITH_MKLDNN // Conversion from MKL-DNN to Paddle if (src_item.layout() == framework::DataLayout::kMKLDNN) { - framework::Tensor out; + phi::DenseTensor out; // Convert to desired Paddle layout, apart from grads of filter // as params are not a subject to paddle's data_format VLOG(4) << "innerTransDataLayoutFromMKLDNN"; diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc index 64489c294d123..8478db44853b8 100644 --- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc @@ -38,7 +38,7 @@ static void DeepCopy(const framework::LoDTensor &src_item, #ifdef PADDLE_WITH_MKLDNN // Conversion from MKL-DNN to Paddle if (src_item.layout() == framework::DataLayout::kMKLDNN) { - framework::Tensor out; + phi::DenseTensor out; // Convert to desired Paddle layout, apart from grads of filter // as params are not a subject to paddle's data_format framework::innerTransDataLayoutFromMKLDNN( @@ -75,7 +75,7 @@ class FetchV2Op : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (!tensor.IsInitialized()) { return expected_kernel_type; diff --git a/paddle/fluid/operators/controlflow/logical_op_mlu.cc b/paddle/fluid/operators/controlflow/logical_op_mlu.cc index 8eb30607158ec..5e1630447b9de 100644 --- a/paddle/fluid/operators/controlflow/logical_op_mlu.cc +++ b/paddle/fluid/operators/controlflow/logical_op_mlu.cc @@ -18,15 +18,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class LogicalMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/controlflow/logical_op_npu.cc b/paddle/fluid/operators/controlflow/logical_op_npu.cc index c3d7df8d02743..7c2c11bbfb40e 100644 --- a/paddle/fluid/operators/controlflow/logical_op_npu.cc +++ b/paddle/fluid/operators/controlflow/logical_op_npu.cc @@ -15,14 +15,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class LogicalNotNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -39,9 +39,9 @@ template class LogicalOrNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -58,9 +58,9 @@ template class LogicalAndPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/controlflow/logical_op_xpu.h b/paddle/fluid/operators/controlflow/logical_op_xpu.h index 445d853364ddd..5e1a24116b080 100644 --- a/paddle/fluid/operators/controlflow/logical_op_xpu.h +++ b/paddle/fluid/operators/controlflow/logical_op_xpu.h @@ -42,16 +42,16 @@ template class BinaryLogicalOpXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* out = context.Output("Out"); bool* out_ptr = out->mutable_data(context.GetPlace()); const T* x_ptr = x->data(); const T* y_ptr = y->data(); auto& dev_ctx = context.template device_context(); - framework::Tensor broadcast_x; - framework::Tensor broadcast_y; + phi::DenseTensor broadcast_x; + phi::DenseTensor broadcast_y; bool need_broad_cast = false; if (x->numel() != out->numel()) { // x need broadcast @@ -160,8 +160,8 @@ template class UnaryLogicalOpXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); if (x->numel() == 0) { return; } diff --git a/paddle/fluid/operators/conv_base_helper.h b/paddle/fluid/operators/conv_base_helper.h index 285dc8fddb7f3..705fc1f5618b5 100644 --- a/paddle/fluid/operators/conv_base_helper.h +++ b/paddle/fluid/operators/conv_base_helper.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = platform::DataLayout; using framework::AlgorithmsCache; using framework::ConvSearchCache; @@ -68,7 +68,7 @@ struct ConvArgsBase { platform::TensorDescriptor idesc, odesc; platform::FilterDescriptor wdesc; platform::ConvolutionDescriptor cdesc; - const framework::Tensor *x, *w, *o; + const phi::DenseTensor *x, *w, *o; DataT cudnn_dtype; // strides @@ -84,9 +84,9 @@ struct ConvArgsBase { // data foramt DataLayout data_layout; - ConvArgsBase(const framework::Tensor* x, - const framework::Tensor* w, - const framework::Tensor* o, + ConvArgsBase(const phi::DenseTensor* x, + const phi::DenseTensor* w, + const phi::DenseTensor* o, const std::vector s, const std::vector p, const std::vector d, diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index ba4e5585f363f..8795b3fa14bcc 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -29,8 +29,8 @@ using ConvArgs = ConvArgsBase; template static void RemovePaddingSlice(const phi::GPUContext& context, - const Tensor* input, - Tensor* out, + const phi::DenseTensor* input, + phi::DenseTensor* out, const std::vector& starts, const std::vector& axes) { auto& place = *context.eigen_device(); diff --git a/paddle/fluid/operators/conv_miopen_helper.h b/paddle/fluid/operators/conv_miopen_helper.h index 648116647b04a..907ae50941602 100644 --- a/paddle/fluid/operators/conv_miopen_helper.h +++ b/paddle/fluid/operators/conv_miopen_helper.h @@ -24,8 +24,8 @@ using ConvArgs = ConvArgsBase; template static void RemovePaddingSlice(const phi::GPUContext& context, - const Tensor* input, - Tensor* out, + const phi::DenseTensor* input, + phi::DenseTensor* out, const std::vector& starts, const std::vector& axes) { auto& place = *context.eigen_device(); diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index d9c1332191ac7..f61329107125a 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -222,8 +222,8 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( if (input_data_type != framework::proto::VarType::INT8 && input_data_type != framework::proto::VarType::UINT8 && input_data_type != framework::proto::VarType::BF16) { - auto filter_data_type = - framework::TransToProtoVarType(ctx.Input("Filter")->dtype()); + auto filter_data_type = framework::TransToProtoVarType( + ctx.Input("Filter")->dtype()); PADDLE_ENFORCE_EQ( input_data_type, filter_data_type, @@ -260,7 +260,7 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( framework::OpKernelType ConvOp::GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN // Only input require reshaping, weights and @@ -532,7 +532,7 @@ framework::OpKernelType ConvOpGrad::GetExpectedKernelType( framework::OpKernelType ConvOpGrad::GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN // Only input require reshaping, weights and diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 806265376fa1f..925603dad9ba3 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -29,7 +29,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; constexpr int kConvMKLDNNFP32 = 1; constexpr int kConvMKLDNNINT8 = 2; constexpr int kConvMKLDNNINT8WS8 = 3; @@ -207,7 +207,7 @@ class ConvOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; @@ -222,7 +222,7 @@ class ConvOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; diff --git a/paddle/fluid/operators/conv_op_mlu.cc b/paddle/fluid/operators/conv_op_mlu.cc index 0e0ed82e8798a..cd0bd90637e34 100644 --- a/paddle/fluid/operators/conv_op_mlu.cc +++ b/paddle/fluid/operators/conv_op_mlu.cc @@ -18,16 +18,16 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; template class MLUConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -129,11 +129,14 @@ template class MLUConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("Input"); - auto filter = ctx.Input("Filter"); - auto output_grad = ctx.Input(framework::GradVarName("Output")); - auto input_grad = ctx.Output(framework::GradVarName("Input")); - auto filter_grad = ctx.Output(framework::GradVarName("Filter")); + auto input = ctx.Input("Input"); + auto filter = ctx.Input("Filter"); + auto output_grad = + ctx.Input(framework::GradVarName("Output")); + auto input_grad = + ctx.Output(framework::GradVarName("Input")); + auto filter_grad = + ctx.Output(framework::GradVarName("Filter")); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -292,9 +295,9 @@ template class MLUDepthwiseConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -398,11 +401,14 @@ template class MLUDepthwiseConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("Input"); - auto filter = ctx.Input("Filter"); - auto output_grad = ctx.Input(framework::GradVarName("Output")); - auto input_grad = ctx.Output(framework::GradVarName("Input")); - auto filter_grad = ctx.Output(framework::GradVarName("Filter")); + auto input = ctx.Input("Input"); + auto filter = ctx.Input("Filter"); + auto output_grad = + ctx.Input(framework::GradVarName("Output")); + auto input_grad = + ctx.Output(framework::GradVarName("Input")); + auto filter_grad = + ctx.Output(framework::GradVarName("Filter")); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); diff --git a/paddle/fluid/operators/conv_op_npu.cc b/paddle/fluid/operators/conv_op_npu.cc index dad2e7d238bfc..f4c7de95483b5 100644 --- a/paddle/fluid/operators/conv_op_npu.cc +++ b/paddle/fluid/operators/conv_op_npu.cc @@ -18,12 +18,12 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; static void CastToFP16(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { out->mutable_data(ctx.GetPlace()); NpuOpRunner runner; runner.SetType("Cast") @@ -35,8 +35,8 @@ static void CastToFP16(const framework::ExecutionContext& ctx, static void CastToFP32(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { out->mutable_data(ctx.GetPlace()); NpuOpRunner runner; runner.SetType("Cast") @@ -50,9 +50,9 @@ template class DepthwiseConvNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - Tensor* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + phi::DenseTensor* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); const std::vector stride = ctx.Attr>("strides"); @@ -151,11 +151,14 @@ template class DepthwiseConvGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - auto output_grad = ctx.Input(framework::GradVarName("Output")); - auto input_grad = ctx.Output(framework::GradVarName("Input")); - auto filter_grad = ctx.Output(framework::GradVarName("Filter")); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + auto output_grad = + ctx.Input(framework::GradVarName("Output")); + auto input_grad = + ctx.Output(framework::GradVarName("Input")); + auto filter_grad = + ctx.Output(framework::GradVarName("Filter")); const std::vector stride = ctx.Attr>("strides"); std::vector padding = ctx.Attr>("paddings"); @@ -268,9 +271,9 @@ template class NPUConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -336,11 +339,14 @@ template class NPUConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("Input"); - auto filter = ctx.Input("Filter"); - auto output_grad = ctx.Input(framework::GradVarName("Output")); - auto input_grad = ctx.Output(framework::GradVarName("Input")); - auto filter_grad = ctx.Output(framework::GradVarName("Filter")); + auto input = ctx.Input("Input"); + auto filter = ctx.Input("Filter"); + auto output_grad = + ctx.Input(framework::GradVarName("Output")); + auto input_grad = + ctx.Output(framework::GradVarName("Input")); + auto filter_grad = + ctx.Output(framework::GradVarName("Filter")); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -447,9 +453,9 @@ template class NPUConv3dKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - Tensor* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + phi::DenseTensor* output = ctx.Output("Output"); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); @@ -533,12 +539,14 @@ template class NPUConv3dGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - const Tensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); - Tensor* filter_grad = ctx.Output(framework::GradVarName("Filter")); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + const phi::DenseTensor* output_grad = + ctx.Input(framework::GradVarName("Output")); + phi::DenseTensor* input_grad = + ctx.Output(framework::GradVarName("Input")); + phi::DenseTensor* filter_grad = + ctx.Output(framework::GradVarName("Filter")); const std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); diff --git a/paddle/fluid/operators/conv_shift_op.cc b/paddle/fluid/operators/conv_shift_op.cc index d7dfa88e2d277..c6b33998eb61b 100644 --- a/paddle/fluid/operators/conv_shift_op.cc +++ b/paddle/fluid/operators/conv_shift_op.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; template @@ -156,9 +155,9 @@ template class ConvShiftKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *X = context.Input("X"); - auto *Y = context.Input("Y"); - auto *Out = context.Output("Out"); + auto *X = context.Input("X"); + auto *Y = context.Input("Y"); + auto *Out = context.Output("Out"); Out->mutable_data(context.GetPlace()); auto x = EigenMatrix::From(*X); @@ -187,11 +186,11 @@ class ConvShiftGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *X = context.Input("X"); - auto *Y = context.Input("Y"); - auto *dOut = context.Input(framework::GradVarName("Out")); - auto *dX = context.Output(framework::GradVarName("X")); - auto *dY = context.Output(framework::GradVarName("Y")); + auto *X = context.Input("X"); + auto *Y = context.Input("Y"); + auto *dOut = context.Input(framework::GradVarName("Out")); + auto *dX = context.Output(framework::GradVarName("X")); + auto *dY = context.Output(framework::GradVarName("Y")); auto x = EigenMatrix::From(*X); auto y = EigenMatrix::From(*Y); diff --git a/paddle/fluid/operators/conv_shift_op.cu b/paddle/fluid/operators/conv_shift_op.cu index 89b703d8d1a5d..689722d24eccb 100644 --- a/paddle/fluid/operators/conv_shift_op.cu +++ b/paddle/fluid/operators/conv_shift_op.cu @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - namespace { inline int DivUp(int x, int y) { return (x + y - 1) / y; } @@ -127,9 +125,9 @@ template class ConvShiftKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Y = context.Input("Y"); - Tensor *Out = context.Output("Out"); + const phi::DenseTensor *X = context.Input("X"); + const phi::DenseTensor *Y = context.Input("Y"); + phi::DenseTensor *Out = context.Output("Out"); const T *x_data = X->data(); const T *y_data = Y->data(); T *out_data = Out->mutable_data(context.GetPlace()); @@ -156,15 +154,18 @@ template class ConvShiftGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Y = context.Input("Y"); - const Tensor *dOut = context.Input(framework::GradVarName("Out")); + const phi::DenseTensor *X = context.Input("X"); + const phi::DenseTensor *Y = context.Input("Y"); + const phi::DenseTensor *dOut = + context.Input(framework::GradVarName("Out")); const T *x_data = X->data(); const T *y_data = Y->data(); const T *dout_data = dOut->data(); - Tensor *dX = context.Output(framework::GradVarName("X")); - Tensor *dY = context.Output(framework::GradVarName("Y")); + phi::DenseTensor *dX = + context.Output(framework::GradVarName("X")); + phi::DenseTensor *dY = + context.Output(framework::GradVarName("Y")); int batch_size = X->dims()[0]; int x_width = X->dims()[1]; diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index d883d2da291b2..8c221ec542114 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -64,7 +64,7 @@ framework::OpKernelType ConvTransposeOp::GetExpectedKernelType( framework::OpKernelType ConvTransposeOp::GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN // Only input require reshaping, weights and diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h index ca82ca518a9e7..d47828e5bdc8f 100644 --- a/paddle/fluid/operators/conv_transpose_op.h +++ b/paddle/fluid/operators/conv_transpose_op.h @@ -43,7 +43,7 @@ class ConvTransposeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; diff --git a/paddle/fluid/operators/conv_transpose_op_mlu.cc b/paddle/fluid/operators/conv_transpose_op_mlu.cc index f757898886e1f..9adeec2d7079e 100644 --- a/paddle/fluid/operators/conv_transpose_op_mlu.cc +++ b/paddle/fluid/operators/conv_transpose_op_mlu.cc @@ -20,16 +20,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; template class Conv2DTransposeMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - Tensor* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + phi::DenseTensor* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); std::vector output_padding = ctx.Attr>("output_padding"); @@ -131,12 +131,14 @@ template class Conv2DTransposeGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - const Tensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); - Tensor* filter_grad = ctx.Output(framework::GradVarName("Filter")); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + const phi::DenseTensor* output_grad = + ctx.Input(framework::GradVarName("Output")); + phi::DenseTensor* input_grad = + ctx.Output(framework::GradVarName("Input")); + phi::DenseTensor* filter_grad = + ctx.Output(framework::GradVarName("Filter")); if ((!input_grad) && (!filter_grad)) return; diff --git a/paddle/fluid/operators/conv_transpose_op_npu.cc b/paddle/fluid/operators/conv_transpose_op_npu.cc index 94a6825ff6134..66a49b1bb89b2 100644 --- a/paddle/fluid/operators/conv_transpose_op_npu.cc +++ b/paddle/fluid/operators/conv_transpose_op_npu.cc @@ -20,16 +20,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template class Conv2DTransposeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - Tensor* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + phi::DenseTensor* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); std::vector output_padding = ctx.Attr>("output_padding"); @@ -107,12 +107,14 @@ template class Conv2DTransposeGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - const Tensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); - Tensor* filter_grad = ctx.Output(framework::GradVarName("Filter")); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + const phi::DenseTensor* output_grad = + ctx.Input(framework::GradVarName("Output")); + phi::DenseTensor* input_grad = + ctx.Output(framework::GradVarName("Input")); + phi::DenseTensor* filter_grad = + ctx.Output(framework::GradVarName("Filter")); if ((!input_grad) && (!filter_grad)) return; @@ -203,9 +205,9 @@ template class Conv3DTransposeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - Tensor* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + phi::DenseTensor* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); std::vector output_padding = ctx.Attr>("output_padding"); diff --git a/paddle/fluid/operators/copy_cross_scope_op.cc b/paddle/fluid/operators/copy_cross_scope_op.cc index a826f1d1b897d..70a125af15156 100644 --- a/paddle/fluid/operators/copy_cross_scope_op.cc +++ b/paddle/fluid/operators/copy_cross_scope_op.cc @@ -31,7 +31,7 @@ class OpBase; } // namespace paddle using LoDTensor = paddle::framework::LoDTensor; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; namespace paddle { namespace operators { @@ -66,7 +66,7 @@ class CopyCrossScopeOp : public framework::OperatorBase { platform::errors::NotFound("No variable with name %s found.", id_name)); auto id_tensor = id_var->GetMutable(); auto it = scope.kids().begin(); - framework::Tensor cpu_id_tensor; + phi::DenseTensor cpu_id_tensor; paddle::framework::TensorCopySync( *id_tensor, platform::CPUPlace(), &cpu_id_tensor); auto id_value = cpu_id_tensor.data(); diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index 0e89889f40f29..cbd06ec042c48 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; inline std::vector CorrelationOutputSize(int batch, int input_height, @@ -115,17 +115,17 @@ class CorrelationOp : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input1"); - PADDLE_ENFORCE_EQ( - input_data_type, - framework::TransToProtoVarType(ctx.Input("Input2")->dtype()), - platform::errors::InvalidArgument( - "X and Y shoule have the same datatype")); + PADDLE_ENFORCE_EQ(input_data_type, + framework::TransToProtoVarType( + ctx.Input("Input2")->dtype()), + platform::errors::InvalidArgument( + "X and Y shoule have the same datatype")); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/correlation_op.cu b/paddle/fluid/operators/correlation_op.cu index 434506c033c4d..0155463dd5306 100644 --- a/paddle/fluid/operators/correlation_op.cu +++ b/paddle/fluid/operators/correlation_op.cu @@ -31,8 +31,6 @@ namespace operators { #endif #define FULL_MASK 0xffffffff -using framework::Tensor; - template __forceinline__ __device__ T warpReduceSum(T val) { for (int offset = 16; offset > 0; offset /= 2) { @@ -186,8 +184,8 @@ class CorrelationCUDAKernel : public framework::OpKernel { platform::errors::InvalidArgument( "Correlation only supports GPU now.")); - auto *input1 = ctx.Input("Input1"); - auto *input2 = ctx.Input("Input2"); + auto *input1 = ctx.Input("Input1"); + auto *input2 = ctx.Input("Input2"); int pad_size = ctx.Attr("pad_size"); int kernel_size = ctx.Attr("kernel_size"); int stride1 = ctx.Attr("stride1"); @@ -195,7 +193,7 @@ class CorrelationCUDAKernel : public framework::OpKernel { int max_displacement = ctx.Attr("max_displacement"); int corr_type_multiply = ctx.Attr("corr_type_multiply"); - auto *output = ctx.Output("Output"); + auto *output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); @@ -209,11 +207,11 @@ class CorrelationCUDAKernel : public framework::OpKernel { int padded_input_height = H + 2 * pad_size; int padded_input_width = W + 2 * pad_size; - Tensor rinput1 = ctx.AllocateTmpTensor( + phi::DenseTensor rinput1 = ctx.AllocateTmpTensor( {N, padded_input_height, padded_input_width, C}, dev_ctx); rinput1.mutable_data(ctx.GetPlace()); - Tensor rinput2 = ctx.AllocateTmpTensor( + phi::DenseTensor rinput2 = ctx.AllocateTmpTensor( {N, padded_input_height, padded_input_width, C}, dev_ctx); rinput2.mutable_data(ctx.GetPlace()); @@ -453,10 +451,10 @@ class CorrelationCUDAGradKernel : public framework::OpKernel { true, platform::errors::InvalidArgument( "Correlation only supports GPU now.")); - const auto *input1 = ctx.Input("Input1"); - const auto *input2 = ctx.Input("Input2"); + const auto *input1 = ctx.Input("Input1"); + const auto *input2 = ctx.Input("Input2"); const auto *grad_output = - ctx.Input(framework::GradVarName("Output")); + ctx.Input(framework::GradVarName("Output")); const int pad_size = ctx.Attr("pad_size"); const int kernel_size = ctx.Attr("kernel_size"); const int stride1 = ctx.Attr("stride1"); @@ -464,9 +462,11 @@ class CorrelationCUDAGradKernel : public framework::OpKernel { const int max_displacement = ctx.Attr("max_displacement"); const int corr_type_multiply = ctx.Attr("corr_type_multiply"); - auto *grad_input1 = ctx.Output(framework::GradVarName("Input1")); + auto *grad_input1 = + ctx.Output(framework::GradVarName("Input1")); grad_input1->mutable_data(ctx.GetPlace()); - auto *grad_input2 = ctx.Output(framework::GradVarName("Input2")); + auto *grad_input2 = + ctx.Output(framework::GradVarName("Input2")); grad_input2->mutable_data(ctx.GetPlace()); auto &dev_ctx = ctx.template device_context(); @@ -479,11 +479,11 @@ class CorrelationCUDAGradKernel : public framework::OpKernel { int padded_input_height = H + 2 * pad_size; int padded_input_width = W + 2 * pad_size; - Tensor rinput1 = ctx.AllocateTmpTensor( + phi::DenseTensor rinput1 = ctx.AllocateTmpTensor( {N, padded_input_height, padded_input_width, C}, dev_ctx); rinput1.mutable_data(ctx.GetPlace()); - Tensor rinput2 = ctx.AllocateTmpTensor( + phi::DenseTensor rinput2 = ctx.AllocateTmpTensor( {N, padded_input_height, padded_input_width, C}, dev_ctx); rinput2.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/cos_sim_op.cc b/paddle/fluid/operators/cos_sim_op.cc index e3228104de38b..902a5eda2d2f6 100644 --- a/paddle/fluid/operators/cos_sim_op.cc +++ b/paddle/fluid/operators/cos_sim_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class CosSimOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/cos_sim_op.h b/paddle/fluid/operators/cos_sim_op.h index 7eb62453840a9..b9db1bcb5df22 100644 --- a/paddle/fluid/operators/cos_sim_op.h +++ b/paddle/fluid/operators/cos_sim_op.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CosSimKernel : public framework::OpKernel { @@ -29,10 +29,10 @@ class CosSimKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { // get Tensor auto* in_x = context.Input("X"); - auto* in_y = context.Input("Y"); + auto* in_y = context.Input("Y"); auto* out_z = context.Output("Out"); - auto* out_x_norm = context.Output("XNorm"); - auto* out_y_norm = context.Output("YNorm"); + auto* out_x_norm = context.Output("XNorm"); + auto* out_y_norm = context.Output("YNorm"); int rows_x = in_x->dims()[0]; int rows_y = in_y->dims()[0]; @@ -75,14 +75,17 @@ class CosSimGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { // get Tensor - auto* in_x = context.Input("X"); - auto* in_y = context.Input("Y"); - auto* in_z = context.Input("Out"); - auto* in_x_norm = context.Input("XNorm"); - auto* in_y_norm = context.Input("YNorm"); - auto* out_grad_x = context.Output(framework::GradVarName("X")); - auto* out_grad_y = context.Output(framework::GradVarName("Y")); - auto* in_grad_z = context.Input(framework::GradVarName("Out")); + auto* in_x = context.Input("X"); + auto* in_y = context.Input("Y"); + auto* in_z = context.Input("Out"); + auto* in_x_norm = context.Input("XNorm"); + auto* in_y_norm = context.Input("YNorm"); + auto* out_grad_x = + context.Output(framework::GradVarName("X")); + auto* out_grad_y = + context.Output(framework::GradVarName("Y")); + auto* in_grad_z = + context.Input(framework::GradVarName("Out")); // compute gradident int rows_x = in_x->dims()[0]; diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index ce3844de6a7f0..3723c5c5dd3ea 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -25,16 +25,15 @@ namespace operators { using framework::LoD; using framework::LoDTensor; -using framework::Tensor; template class CRFDecodingOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* emission_weights = ctx.Input("Emission"); - auto* transition_weights = ctx.Input("Transition"); + auto* transition_weights = ctx.Input("Transition"); auto* label = ctx.Input("Label"); - auto* decoded_path = ctx.Output("ViterbiPath"); + auto* decoded_path = ctx.Output("ViterbiPath"); int64_t* path = decoded_path->mutable_data(platform::CPUPlace()); phi::funcs::SetConstant()( @@ -42,12 +41,12 @@ class CRFDecodingOpKernel : public framework::OpKernel { bool has_length = ctx.HasInput("Length"); if (has_length) { - auto* length = ctx.Input("Length"); + auto* length = ctx.Input("Length"); const size_t seq_num = length->numel(); const int64_t* length_data = length->data(); auto in_dims = emission_weights->dims(); - Tensor emission_weights_tmp = *emission_weights; + phi::DenseTensor emission_weights_tmp = *emission_weights; emission_weights_tmp.Resize({in_dims[0] * in_dims[1], in_dims[2]}); decoded_path->Resize({in_dims[0] * in_dims[1], 1}); @@ -55,7 +54,8 @@ class CRFDecodingOpKernel : public framework::OpKernel { if (length_data[i] == 0) continue; int64_t start_pos = i * in_dims[1]; int64_t end_pos = start_pos + static_cast(length_data[i]); - Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); + phi::DenseTensor decoded_path_one_seq = + decoded_path->Slice(start_pos, end_pos); Decode(emission_weights_tmp.Slice(start_pos, end_pos), *transition_weights, &decoded_path_one_seq); @@ -97,7 +97,8 @@ class CRFDecodingOpKernel : public framework::OpKernel { if (lod[level][i] == lod[level][i + 1]) continue; int64_t start_pos = static_cast(lod[level][i]); int64_t end_pos = static_cast(lod[level][i + 1]); - Tensor decoded_path_one_seq = decoded_path->Slice(start_pos, end_pos); + phi::DenseTensor decoded_path_one_seq = + decoded_path->Slice(start_pos, end_pos); Decode(emission_weights->Slice(start_pos, end_pos), *transition_weights, &decoded_path_one_seq); @@ -119,9 +120,9 @@ class CRFDecodingOpKernel : public framework::OpKernel { } private: - void Decode(const Tensor& emission_weights, - const Tensor& transition_weights, - Tensor* decoded_path) const { + void Decode(const phi::DenseTensor& emission_weights, + const phi::DenseTensor& transition_weights, + phi::DenseTensor* decoded_path) const { auto emission_dims = emission_weights.dims(); const size_t seq_len = emission_dims[0]; const size_t tag_num = emission_dims[1]; @@ -132,9 +133,9 @@ class CRFDecodingOpKernel : public framework::OpKernel { // alpha is a memo table. An element alpha(k, v) records the score of the // best sequence of tags from position 1 to position k with v being the end // tag. - Tensor alpha; + phi::DenseTensor alpha; T* alpha_value = alpha.mutable_data(emission_dims, platform::CPUPlace()); - Tensor track; + phi::DenseTensor track; int* track_value = track.mutable_data(emission_dims, platform::CPUPlace()); auto ker = diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index f7c72c11ddfac..462764230f484 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class CropOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h index fe17ac773a259..c193eabba372c 100644 --- a/paddle/fluid/operators/crop_op.h +++ b/paddle/fluid/operators/crop_op.h @@ -29,18 +29,17 @@ template using EigenTensor = framework::EigenTensor; -using framework::Tensor; static std::vector GetOffsets(const framework::ExecutionContext& ctx) { std::vector res; - int rank = ctx.Input("X")->dims().size(); + int rank = ctx.Input("X")->dims().size(); if (ctx.HasInput("Offsets")) { PADDLE_ENFORCE_EQ(ctx.Attr>("offsets").empty(), true, platform::errors::InvalidArgument( "Input 'Offsets' and attribute 'offsets' " "should not be used at the same time for CropOp.")); - const auto* offsets_tensor = ctx.Input("Offsets"); + const auto* offsets_tensor = ctx.Input("Offsets"); PADDLE_ENFORCE_EQ(offsets_tensor->dims().size(), 1, platform::errors::InvalidArgument( @@ -57,7 +56,7 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { offsets_tensor->dims()[0], rank)); const int* offsets_data; - framework::Tensor cpu_tmp_tensor; + phi::DenseTensor cpu_tmp_tensor; if (platform::is_cpu_place(offsets_tensor->place())) { offsets_data = offsets_tensor->data(); } else { @@ -83,8 +82,8 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { template void CropFunction(const framework::ExecutionContext& context) { - auto* x = context.Input("X"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); auto out_dims = out->dims(); if (out_dims[0] == -1) { out_dims[0] = x->dims()[0]; @@ -115,7 +114,7 @@ template class CropKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - int rank = context.Input("X")->dims().size(); + int rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( rank, 1, @@ -155,10 +154,11 @@ class CropKernel : public framework::OpKernel { template void CropGradFunction(const framework::ExecutionContext& context) { - auto* d_x = context.Output(framework::GradVarName("X")); - auto* x = context.Input("X"); + auto* d_x = context.Output(framework::GradVarName("X")); + auto* x = context.Input("X"); if (d_x != nullptr) { - auto* d_out = context.Input(framework::GradVarName("Out")); + auto* d_out = + context.Input(framework::GradVarName("Out")); d_x->mutable_data(x->dims(), context.GetPlace()); auto offsets = GetOffsets(context); Eigen::array, D> paddings; @@ -180,7 +180,9 @@ class CropGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { size_t rank = - context.Input(framework::GradVarName("Out"))->dims().size(); + context.Input(framework::GradVarName("Out")) + ->dims() + .size(); PADDLE_ENFORCE_GE( rank, 1, diff --git a/paddle/fluid/operators/crop_op_npu.cc b/paddle/fluid/operators/crop_op_npu.cc index bd50dea15f80e..8980e5f73dee7 100644 --- a/paddle/fluid/operators/crop_op_npu.cc +++ b/paddle/fluid/operators/crop_op_npu.cc @@ -18,17 +18,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CropNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); std::vector offset_list; if (ctx.HasInput("Offsets")) { - auto* offsets_tensor = ctx.Input("Offsets"); + auto* offsets_tensor = ctx.Input("Offsets"); paddle::framework::TensorToVector( *offsets_tensor, ctx.device_context(), &offset_list); if (offset_list.empty()) { @@ -56,11 +56,11 @@ class CropNPUKernel : public framework::OpKernel { int axis_int = 0; framework::NPUAttributeMap attr_input = {{"offsets", offset_list}, {"axis", axis_int}}; - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); if (ctx.HasInput("Y")) { - auto* shape = ctx.Input("Y"); + auto* shape = ctx.Input("Y"); PADDLE_ENFORCE_EQ(shape->dims().size(), x->dims().size(), platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/crop_tensor_op.cc b/paddle/fluid/operators/crop_tensor_op.cc index c75a5eaf86dac..44986baef8120 100644 --- a/paddle/fluid/operators/crop_tensor_op.cc +++ b/paddle/fluid/operators/crop_tensor_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class CropTensorOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -117,7 +115,7 @@ class CropTensorOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensor" || var_name == "OffsetsTensor" || var_name == "Shape" || var_name == "Offsets") { @@ -276,7 +274,7 @@ class CropTensorOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensor" || var_name == "OffsetsTensor" || var_name == "Shape" || var_name == "Offsets") { diff --git a/paddle/fluid/operators/cross_entropy_op.h b/paddle/fluid/operators/cross_entropy_op.h index 4445e0a79a640..2949dc8d1fb2a 100644 --- a/paddle/fluid/operators/cross_entropy_op.h +++ b/paddle/fluid/operators/cross_entropy_op.h @@ -23,15 +23,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CrossEntropyOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* labels = ctx.Input("Label"); - auto* y = ctx.Output("Y"); + auto* x = ctx.Input("X"); + auto* labels = ctx.Input("Label"); + auto* y = ctx.Output("Y"); y->mutable_data(ctx.GetPlace()); int rank = x->dims().size(); @@ -126,10 +126,10 @@ template class CrossEntropyGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* label = ctx.Input("Label"); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* label = ctx.Input("Label"); + auto* dx = ctx.Output(framework::GradVarName("X")); T* dx_data = dx->mutable_data(ctx.GetPlace()); // Following computation only depends on the last dimension size. So it's @@ -244,10 +244,10 @@ template class CrossEntropyOpKernel2 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - auto* y = ctx.Output("Y"); - auto* match_x = ctx.Output("MatchX"); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); + auto* y = ctx.Output("Y"); + auto* match_x = ctx.Output("MatchX"); auto& x_dims = x->dims(); auto feature_size = x_dims[x_dims.size() - 1]; @@ -271,10 +271,10 @@ template class CrossEntropyGradientOpKernel2 : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* match_x = ctx.Input("MatchX"); - auto* label = ctx.Input("Label"); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* match_x = ctx.Input("MatchX"); + auto* label = ctx.Input("Label"); auto* p_dx = dx->mutable_data(ctx.GetPlace()); auto* p_dy = dy->data(); diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index 6eeb890d38f03..e6f2e9900b051 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template diff --git a/paddle/fluid/operators/cuda_graph_with_in_out.h b/paddle/fluid/operators/cuda_graph_with_in_out.h index e5e98aa2445c1..a667c40234dca 100644 --- a/paddle/fluid/operators/cuda_graph_with_in_out.h +++ b/paddle/fluid/operators/cuda_graph_with_in_out.h @@ -28,7 +28,7 @@ class CUDAGraphWithInOuts { template CUDAGraphWithInOuts(Callable &&callable, platform::CUDAPlace place, - const std::vector &in_ptrs, + const std::vector &in_ptrs, cudaStreamCaptureMode mode, int64_t pool_id) { in_indices_.resize(in_ptrs.size()); @@ -61,7 +61,7 @@ class CUDAGraphWithInOuts { } } - void Run(const std::vector &ins) { + void Run(const std::vector &ins) { PADDLE_ENFORCE_EQ( ins.size(), in_indices_.size(), @@ -75,8 +75,8 @@ class CUDAGraphWithInOuts { graph_->Replay(); } - std::vector GetOutputs() { - std::vector outs(out_indices_.size()); + std::vector GetOutputs() { + std::vector outs(out_indices_.size()); for (size_t i = 0; i < out_indices_.size(); ++i) { if (out_indices_[i] >= 0) { outs[i] = &outs_[out_indices_[i]]; @@ -89,8 +89,8 @@ class CUDAGraphWithInOuts { private: std::unique_ptr graph_; - std::vector ins_; - std::vector outs_; + std::vector ins_; + std::vector outs_; std::vector in_indices_; std::vector out_indices_; }; @@ -103,17 +103,17 @@ static std::unique_ptr CaptureCUDAGraph( const std::vector &output_names, cudaStreamCaptureMode mode, int64_t pool_id) { - std::vector inputs; + std::vector inputs; for (const auto &name : input_names) { - auto input_tensors = ctx.MultiInput(name); + auto input_tensors = ctx.MultiInput(name); inputs.insert(inputs.end(), input_tensors.begin(), input_tensors.end()); } - auto func = [&](const std::vector &inputs) { + auto func = [&](const std::vector &inputs) { callable(ctx); - std::vector outputs; + std::vector outputs; for (const auto &name : output_names) { - auto output_tensors = ctx.MultiOutput(name); + auto output_tensors = ctx.MultiOutput(name); outputs.insert( outputs.end(), output_tensors.begin(), output_tensors.end()); } @@ -128,9 +128,9 @@ static void ExecuteCUDAGraph(const framework::ExecutionContext &ctx, const std::vector &input_names, const std::vector &output_names, CUDAGraphWithInOuts *graph) { - std::vector inputs; + std::vector inputs; for (const auto &name : input_names) { - auto input_tensors = ctx.MultiInput(name); + auto input_tensors = ctx.MultiInput(name); inputs.insert(inputs.end(), input_tensors.begin(), input_tensors.end()); } @@ -139,7 +139,7 @@ static void ExecuteCUDAGraph(const framework::ExecutionContext &ctx, size_t idx = 0; for (const auto &name : output_names) { - auto output_tensors = ctx.MultiOutput(name); + auto output_tensors = ctx.MultiOutput(name); for (auto *out_t : output_tensors) { if (outputs[idx] != nullptr) { *out_t = *outputs[idx]; diff --git a/paddle/fluid/operators/cudnn_lstm_cache.h b/paddle/fluid/operators/cudnn_lstm_cache.h index 317d78639fcf3..32f1b46dbbd39 100644 --- a/paddle/fluid/operators/cudnn_lstm_cache.h +++ b/paddle/fluid/operators/cudnn_lstm_cache.h @@ -52,7 +52,7 @@ class ScopedRNNBase { const std::vector& sequence_length, size_t* workspace_size, size_t* reserve_size, - framework::Tensor* dropout_state) { + phi::DenseTensor* dropout_state) { int numDirections = is_bidirec_ ? 2 : 1; cudnnDataType_t cudnn_type = platform::CudnnDataType::type; diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index d53333d217603..3435f790a3651 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -27,7 +27,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template bool is_continuous(const Type &weight_list) { @@ -112,7 +112,7 @@ void LSTMInferece(const bool &has_seq_length, T *out_data, T *last_h_data, T *last_c_data, - framework::Tensor *workspace_data, + phi::DenseTensor *workspace_data, const size_t &workspace_size) { if (!has_seq_length) { // for inference @@ -205,15 +205,15 @@ template class CudnnLSTMGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const Tensor *x = ctx.Input("Input"); - const Tensor *init_h = ctx.Input("InitH"); - const Tensor *init_c = ctx.Input("InitC"); + const Tensor *x = ctx.Input("Input"); + const Tensor *init_h = ctx.Input("InitH"); + const Tensor *init_c = ctx.Input("InitC"); - Tensor *out = ctx.Output("Out"); - Tensor *last_h = ctx.Output("LastH"); - Tensor *last_c = ctx.Output("LastC"); - Tensor *reserve = ctx.Output("Reserve"); - Tensor *state_out = ctx.Output("StateOut"); + Tensor *out = ctx.Output("Out"); + Tensor *last_h = ctx.Output("LastH"); + Tensor *last_c = ctx.Output("LastC"); + Tensor *reserve = ctx.Output("Reserve"); + Tensor *state_out = ctx.Output("StateOut"); const T *x_data = x->data(); const T *init_h_data = init_h->data(); @@ -243,7 +243,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { bool has_seq_length = ctx.HasInput("SequenceLength"); std::vector SequenceLength; if (has_seq_length) { - auto *sequence_length = ctx.Input("SequenceLength"); + auto *sequence_length = ctx.Input("SequenceLength"); SequenceLength = operators::GetDataFromTensor(sequence_length); } @@ -266,12 +266,12 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { reinterpret_cast(ctx.device_context()) .stream(); if (is_test && ctx.HasInput("W")) { - auto *W = ctx.Input("W"); + auto *W = ctx.Input("W"); w_initialized = W->IsInitialized() ? true : false; weight_numel = W->numel(); } if (!w_initialized) { - auto weight_list = ctx.MultiInput("WeightList"); + auto weight_list = ctx.MultiInput("WeightList"); bool continuous = is_continuous>(weight_list); weight_numel = size_sum(weight_list); @@ -301,7 +301,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { w_data = const_cast(weight_list[0]->data()); } } else { - auto *W = ctx.Input("W"); + auto *W = ctx.Input("W"); w_data = const_cast(W->data()); } @@ -322,7 +322,7 @@ class CudnnLSTMGPUKernel : public framework::OpKernel { &reserve_size, state_out); - framework::Tensor workspace_data_; + phi::DenseTensor workspace_data_; workspace_data_.mutable_data( {static_cast(workspace_size)}, ctx.GetPlace()); @@ -442,23 +442,28 @@ template class CudnnLSTMGPUGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *input = ctx.Input("Input"); - auto *init_h = ctx.Input("InitH"); - auto *init_c = ctx.Input("InitC"); - auto *reserve = ctx.Input("Reserve"); - auto *state_out = ctx.Input("StateOut"); - auto weight_list = ctx.MultiInput("WeightList"); - - auto *out = ctx.Input("Out"); - auto *out_grad = ctx.Input(framework::GradVarName("Out")); - auto *last_h_grad = ctx.Input(framework::GradVarName("LastH")); - auto *last_c_grad = ctx.Input(framework::GradVarName("LastC")); - - auto *in_grad = ctx.Output(framework::GradVarName("Input")); - auto *init_h_grad = ctx.Output(framework::GradVarName("InitH")); - auto *init_c_grad = ctx.Output(framework::GradVarName("InitC")); - auto weight_grad_list = ctx.MultiOutput( - framework::GradVarName("WeightList")); + auto *input = ctx.Input("Input"); + auto *init_h = ctx.Input("InitH"); + auto *init_c = ctx.Input("InitC"); + auto *reserve = ctx.Input("Reserve"); + auto *state_out = ctx.Input("StateOut"); + auto weight_list = ctx.MultiInput("WeightList"); + + auto *out = ctx.Input("Out"); + auto *out_grad = ctx.Input(framework::GradVarName("Out")); + auto *last_h_grad = + ctx.Input(framework::GradVarName("LastH")); + auto *last_c_grad = + ctx.Input(framework::GradVarName("LastC")); + + auto *in_grad = + ctx.Output(framework::GradVarName("Input")); + auto *init_h_grad = + ctx.Output(framework::GradVarName("InitH")); + auto *init_c_grad = + ctx.Output(framework::GradVarName("InitC")); + auto weight_grad_list = + ctx.MultiOutput(framework::GradVarName("WeightList")); auto &dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); @@ -528,7 +533,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { bool has_seq_length = ctx.HasInput("SequenceLength"); std::vector SequenceLength; if (has_seq_length) { - auto *sequence_length = ctx.Input("SequenceLength"); + auto *sequence_length = ctx.Input("SequenceLength"); SequenceLength = operators::GetDataFromTensor(sequence_length); } @@ -557,7 +562,7 @@ class CudnnLSTMGPUGradKernel : public framework::OpKernel { &reserve_size, const_cast(state_out)); - framework::Tensor workspace_data_; + phi::DenseTensor workspace_data_; workspace_data_.mutable_data( {static_cast(workspace_size)}, ctx.GetPlace()); const uint8_t *reserve_data = reserve->data(); diff --git a/paddle/fluid/operators/cudnn_rnn_cache.h b/paddle/fluid/operators/cudnn_rnn_cache.h index 69448000ac39e..6cd7160e0ae26 100644 --- a/paddle/fluid/operators/cudnn_rnn_cache.h +++ b/paddle/fluid/operators/cudnn_rnn_cache.h @@ -53,7 +53,7 @@ struct CudnnRNNCache { cudnnFilterDescriptor_t dw_desc_; size_t workspace_size_; - framework::Tensor workspace_data_; + phi::DenseTensor workspace_data_; size_t seq_length_; @@ -78,7 +78,7 @@ struct CudnnRNNCache { int seed, int weight_numel, size_t *reserve_size_, - framework::Tensor *dropout_state_, + phi::DenseTensor *dropout_state_, bool initialized, cudnnDataType_t cudnn_type) { seq_length_ = seq_len; diff --git a/paddle/fluid/operators/cumsum_op_mlu.cc b/paddle/fluid/operators/cumsum_op_mlu.cc index bc14075cc23f6..83d9a10af1730 100644 --- a/paddle/fluid/operators/cumsum_op_mlu.cc +++ b/paddle/fluid/operators/cumsum_op_mlu.cc @@ -18,14 +18,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CumSumMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); int axis = ctx.Attr("axis"); bool exclusive = ctx.Attr("exclusive"); bool reverse = ctx.Attr("reverse"); @@ -33,7 +33,7 @@ class CumSumMLUKernel : public framework::OpKernel { out->mutable_data(ctx.GetPlace()); - Tensor* input_ptr = const_cast(x); + phi::DenseTensor* input_ptr = const_cast(x); Tensor flat_x(x->type()); if (flatten) { PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/cumsum_op_npu.cc b/paddle/fluid/operators/cumsum_op_npu.cc index 9d434d24e55a8..672a59cf22f59 100644 --- a/paddle/fluid/operators/cumsum_op_npu.cc +++ b/paddle/fluid/operators/cumsum_op_npu.cc @@ -19,10 +19,10 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; -static void CumsumImp(const Tensor& input, - Tensor* output, +static void CumsumImp(const phi::DenseTensor& input, + phi::DenseTensor* output, const framework::NPUAttributeMap& attr_input, const framework::ExecutionContext& ctx) { auto stream = @@ -65,8 +65,8 @@ template class CumSumNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); int axis = ctx.Attr("axis"); bool exclusive = ctx.Attr("exclusive"); bool reverse = ctx.Attr("reverse"); diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index d776ccfa4db35..153b181b4fd6a 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class CVMOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/cvm_op.cu b/paddle/fluid/operators/cvm_op.cu index d08d9e14ef06e..f8ab86ff54e36 100644 --- a/paddle/fluid/operators/cvm_op.cu +++ b/paddle/fluid/operators/cvm_op.cu @@ -22,7 +22,7 @@ namespace paddle { namespace operators { using platform::PADDLE_CUDA_NUM_THREADS; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -131,7 +131,7 @@ class CVMGradCUDAKernel : public framework::OpKernel { auto* dx = context.Output(framework::GradVarName("X")); T* dx_data = dx->mutable_data(context.GetPlace()); - const Tensor* cvm = context.Input("CVM"); + const phi::DenseTensor* cvm = context.Input("CVM"); const T* cvm_data = cvm->data(); const auto* dOut = diff --git a/paddle/fluid/operators/cvm_op.h b/paddle/fluid/operators/cvm_op.h index 3258737d29a6a..4206c8f458425 100644 --- a/paddle/fluid/operators/cvm_op.h +++ b/paddle/fluid/operators/cvm_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -105,7 +105,7 @@ class CVMGradOpKernel : public framework::OpKernel { auto* dx = context.Output(framework::GradVarName("X")); T* dx_data = dx->mutable_data(context.GetPlace()); - const Tensor* cvm = context.Input("CVM"); + const phi::DenseTensor* cvm = context.Input("CVM"); const T* cvm_data = cvm->data(); const auto* dOut = diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index 4fc279e03a36f..ea6b034f0b481 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; @@ -289,7 +289,7 @@ class DataNormKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - const auto *x = ctx.Input("X"); + const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); PADDLE_ENFORCE_EQ( x_dims.size(), @@ -299,19 +299,19 @@ class DataNormKernel : public framework::OpKernel { const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] : x_dims[x_dims.size() - 1]); - auto *y = ctx.Output("Y"); - auto *mean_out = ctx.Output("Means"); - auto *scales = ctx.Output("Scales"); + auto *y = ctx.Output("Y"); + auto *mean_out = ctx.Output("Means"); + auto *scales = ctx.Output("Scales"); // alloc memory T *y_data = y->mutable_data(ctx.GetPlace()); ConstEigenVectorArrayMap b_size_arr( - ctx.Input("BatchSize")->data(), C); + ctx.Input("BatchSize")->data(), C); ConstEigenVectorArrayMap b_sum_arr( - ctx.Input("BatchSum")->data(), C); + ctx.Input("BatchSum")->data(), C); ConstEigenVectorArrayMap b_square_sum_arr( - ctx.Input("BatchSquareSum")->data(), C); + ctx.Input("BatchSquareSum")->data(), C); EigenVectorArrayMap means_arr(mean_out->mutable_data(ctx.GetPlace()), C); EigenVectorArrayMap scales_arr(scales->mutable_data(ctx.GetPlace()), @@ -360,8 +360,8 @@ class DataNormKernel : public framework::OpKernel { scales_arr; } else if (ctx.Attr("enable_scale_and_shift") && slot_dim <= 0) { - const auto *scale_w = ctx.Input("scale_w"); - const auto *bias = ctx.Input("bias"); + const auto *scale_w = ctx.Input("scale_w"); + const auto *bias = ctx.Input("bias"); ConstEigenVectorArrayMap scale_w_arr(scale_w->data(), C); ConstEigenVectorArrayMap bias_arr(bias->data(), C); @@ -377,8 +377,8 @@ class DataNormKernel : public framework::OpKernel { } else { const int item_size = x->numel() / N; - const auto *scale_w = ctx.Input("scale_w"); - const auto *bias = ctx.Input("bias"); + const auto *scale_w = ctx.Input("scale_w"); + const auto *bias = ctx.Input("bias"); const T *scale_w_data = scale_w->data(); const T *bias_data = bias->data(); // location of show number in one embedding @@ -528,10 +528,10 @@ template class DataNormGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scales = ctx.Input("Scales"); - const auto *means = ctx.Input("Means"); + const auto *x = ctx.Input("X"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scales = ctx.Input("Scales"); + const auto *means = ctx.Input("Means"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = @@ -551,14 +551,15 @@ class DataNormGradKernel : public framework::OpKernel { // init output Tensor *d_x = nullptr; if (ctx.HasOutput(framework::GradVarName("X"))) { - d_x = ctx.Output(framework::GradVarName("X")); + d_x = ctx.Output(framework::GradVarName("X")); } auto *d_batch_size = - ctx.Output(framework::GradVarName("BatchSize")); - auto *d_batch_sum = ctx.Output(framework::GradVarName("BatchSum")); + ctx.Output(framework::GradVarName("BatchSize")); + auto *d_batch_sum = + ctx.Output(framework::GradVarName("BatchSum")); auto *d_batch_square_sum = - ctx.Output(framework::GradVarName("BatchSquareSum")); + ctx.Output(framework::GradVarName("BatchSquareSum")); const T *mean_data = means->data(); const T *inv_var_data = scales->data(); @@ -596,10 +597,11 @@ class DataNormGradKernel : public framework::OpKernel { d_x_arr.col(nc) = d_y_arr.col(nc) * scales_arr; } } else { - const auto *scale_w = ctx.Input("scale_w"); + const auto *scale_w = ctx.Input("scale_w"); auto *d_scale = - ctx.Output(framework::GradVarName("scale_w")); - auto *d_bias = ctx.Output(framework::GradVarName("bias")); + ctx.Output(framework::GradVarName("scale_w")); + auto *d_bias = + ctx.Output(framework::GradVarName("bias")); ConstEigenVectorArrayMap scale_arr(scale_w->data(), C); T *d_bias_data = nullptr; T *d_scale_data = nullptr; diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu index e3f510e755b9c..b0819d81a1dab 100644 --- a/paddle/fluid/operators/data_norm_op.cu +++ b/paddle/fluid/operators/data_norm_op.cu @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; using platform::PADDLE_CUDA_NUM_THREADS; @@ -107,7 +107,7 @@ template class DataNormKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); + const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); // Align with CPU version, but should we add this restriction? PADDLE_ENFORCE_EQ( @@ -116,18 +116,20 @@ class DataNormKernel : public framework::OpKernel { platform::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; - const T *batch_size_in = ctx.Input("BatchSize")->data(); - const T *batch_sum_in = ctx.Input("BatchSum")->data(); + const T *batch_size_in = + ctx.Input("BatchSize")->data(); + const T *batch_sum_in = ctx.Input("BatchSum")->data(); const T *batch_square_sum_in = - ctx.Input("BatchSquareSum")->data(); + ctx.Input("BatchSquareSum")->data(); auto *x_data = x->data(); // alloc memory - T *y_data = ctx.Output("Y")->mutable_data(ctx.GetPlace()); + T *y_data = + ctx.Output("Y")->mutable_data(ctx.GetPlace()); T *mean_out_data = - ctx.Output("Means")->mutable_data(ctx.GetPlace()); + ctx.Output("Means")->mutable_data(ctx.GetPlace()); T *scale_out_data = - ctx.Output("Scales")->mutable_data(ctx.GetPlace()); + ctx.Output("Scales")->mutable_data(ctx.GetPlace()); auto stream = ctx.template device_context().stream(); @@ -147,10 +149,10 @@ template class DataNormGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scales = ctx.Input("Scales"); - const auto *means = ctx.Input("Means"); + const auto *x = ctx.Input("X"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scales = ctx.Input("Scales"); + const auto *means = ctx.Input("Means"); const float epsilon = ctx.Attr("epsilon"); const float dr = ctx.Attr("summary_decay_rate"); const bool need_sync_stats = ctx.Attr("sync_stats"); @@ -167,14 +169,16 @@ class DataNormGradKernel : public framework::OpKernel { // init output Tensor *d_x = nullptr; if (ctx.HasOutput(framework::GradVarName("X"))) { - d_x = ctx.Output(framework::GradVarName("X")); + d_x = ctx.Output(framework::GradVarName("X")); } - T *d_batch_size = ctx.Output(framework::GradVarName("BatchSize")) - ->mutable_data(ctx.GetPlace()); - T *d_batch_sum = ctx.Output(framework::GradVarName("BatchSum")) - ->mutable_data(ctx.GetPlace()); + T *d_batch_size = + ctx.Output(framework::GradVarName("BatchSize")) + ->mutable_data(ctx.GetPlace()); + T *d_batch_sum = + ctx.Output(framework::GradVarName("BatchSum")) + ->mutable_data(ctx.GetPlace()); T *d_batch_square_sum = - ctx.Output(framework::GradVarName("BatchSquareSum")) + ctx.Output(framework::GradVarName("BatchSquareSum")) ->mutable_data(ctx.GetPlace()); auto stream = ctx.template device_context().stream(); @@ -234,12 +238,12 @@ class DataNormGradKernel : public framework::OpKernel { #endif } - T *batch_size_data = - ctx.Output("BatchSize")->mutable_data(ctx.GetPlace()); - T *batch_sum_data = - ctx.Output("BatchSum")->mutable_data(ctx.GetPlace()); - T *batch_square_sum_data = - ctx.Output("BatchSquareSum")->mutable_data(ctx.GetPlace()); + T *batch_size_data = ctx.Output("BatchSize") + ->mutable_data(ctx.GetPlace()); + T *batch_sum_data = ctx.Output("BatchSum") + ->mutable_data(ctx.GetPlace()); + T *batch_square_sum_data = ctx.Output("BatchSquareSum") + ->mutable_data(ctx.GetPlace()); KernelUpdateParam<<>>( C, d_batch_size, diff --git a/paddle/fluid/operators/decode_jpeg_op.cc b/paddle/fluid/operators/decode_jpeg_op.cc index 6e12b25028b04..973552adb5e30 100644 --- a/paddle/fluid/operators/decode_jpeg_op.cc +++ b/paddle/fluid/operators/decode_jpeg_op.cc @@ -40,7 +40,7 @@ class DecodeJpegOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { if (var_name == "X") { return expected_kernel_type; diff --git a/paddle/fluid/operators/deformable_conv_op_mlu.cc b/paddle/fluid/operators/deformable_conv_op_mlu.cc index 0e3e45148fe91..08969ba98fcd2 100644 --- a/paddle/fluid/operators/deformable_conv_op_mlu.cc +++ b/paddle/fluid/operators/deformable_conv_op_mlu.cc @@ -18,17 +18,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class DeformableConvMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* offset = ctx.Input("Offset"); - auto* mask = ctx.Input("Mask"); - auto* filter = ctx.Input("Filter"); - auto* output = ctx.Output("Output"); + auto* input = ctx.Input("Input"); + auto* offset = ctx.Input("Offset"); + auto* mask = ctx.Input("Mask"); + auto* filter = ctx.Input("Filter"); + auto* output = ctx.Output("Output"); output->mutable_data(ctx.GetPlace()); const int groups = ctx.Attr("groups"); @@ -125,17 +125,21 @@ template class DeformableConvGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - auto* input_grad = ctx.Output(framework::GradVarName("Input")); - auto* filter_grad = ctx.Output(framework::GradVarName("Filter")); - auto* offset_grad = ctx.Output(framework::GradVarName("Offset")); - auto* mask_grad = ctx.Output(framework::GradVarName("Mask")); - - const Tensor* input = ctx.Input("Input"); - auto* offset = ctx.Input("Offset"); - auto* mask = ctx.Input("Mask"); - auto* filter = ctx.Input("Filter"); + const phi::DenseTensor* output_grad = + ctx.Input(framework::GradVarName("Output")); + auto* input_grad = + ctx.Output(framework::GradVarName("Input")); + auto* filter_grad = + ctx.Output(framework::GradVarName("Filter")); + auto* offset_grad = + ctx.Output(framework::GradVarName("Offset")); + auto* mask_grad = + ctx.Output(framework::GradVarName("Mask")); + + const phi::DenseTensor* input = ctx.Input("Input"); + auto* offset = ctx.Input("Offset"); + auto* mask = ctx.Input("Mask"); + auto* filter = ctx.Input("Filter"); int groups = ctx.Attr("groups"); int deformable_groups = ctx.Attr("deformable_groups"); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index 2fcdebd5e826a..acfbda237c2c8 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -39,7 +39,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using paddle::platform::PADDLE_CUDA_NUM_THREADS; @@ -184,12 +184,12 @@ template class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); + const phi::DenseTensor* input = ctx.Input("Input"); const LoDTensor* rois = ctx.Input("ROIs"); - const Tensor* trans = ctx.Input("Trans"); - Tensor* out = ctx.Output("Output"); + const phi::DenseTensor* trans = ctx.Input("Trans"); + phi::DenseTensor* out = ctx.Output("Output"); out->mutable_data(ctx.GetPlace()); - Tensor* top_count = ctx.Output("TopCount"); + phi::DenseTensor* top_count = ctx.Output("TopCount"); top_count->mutable_data(ctx.GetPlace()); auto no_trans = ctx.Attr("no_trans"); @@ -236,7 +236,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel { const T* bottom_rois = rois->data(); const T* bottom_trans = no_trans ? NULL : trans->data(); - framework::Tensor roi_batch_id_list; + phi::DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({num_rois}); auto cplace = platform::CPUPlace(); int* roi_batch_id_data = roi_batch_id_list.mutable_data(cplace); @@ -489,14 +489,16 @@ template class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* input = ctx.Input("Input"); + const phi::DenseTensor* input = ctx.Input("Input"); const LoDTensor* rois = ctx.Input("ROIs"); - const Tensor* trans = ctx.Input("Trans"); - const Tensor* top_count = ctx.Input("TopCount"); - const Tensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); - Tensor* trans_grad = ctx.Output(framework::GradVarName("Trans")); + const phi::DenseTensor* trans = ctx.Input("Trans"); + const phi::DenseTensor* top_count = ctx.Input("TopCount"); + const phi::DenseTensor* output_grad = + ctx.Input(framework::GradVarName("Output")); + phi::DenseTensor* input_grad = + ctx.Output(framework::GradVarName("Input")); + phi::DenseTensor* trans_grad = + ctx.Output(framework::GradVarName("Trans")); phi::funcs::SetConstant set_zero; auto& dev_ctx = ctx.cuda_device_context(); @@ -550,7 +552,7 @@ class DeformablePSROIPoolGradCUDAKernel : public framework::OpKernel { } const T* top_count_data = top_count->data(); - framework::Tensor roi_batch_id_list; + phi::DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({num_rois}); auto cplace = platform::CPUPlace(); int* roi_batch_id_data = roi_batch_id_list.mutable_data(cplace); diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index 937afa362996c..d6961524e6f0c 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -33,7 +33,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -173,12 +173,12 @@ template class DeformablePSROIPoolCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); + auto* input = ctx.Input("Input"); auto* rois = ctx.Input("ROIs"); - auto* trans = ctx.Input("Trans"); - auto* out = ctx.Output("Output"); + auto* trans = ctx.Input("Trans"); + auto* out = ctx.Output("Output"); out->mutable_data(ctx.GetPlace()); - auto* top_count = ctx.Output("TopCount"); + auto* top_count = ctx.Output("TopCount"); top_count->mutable_data(ctx.GetPlace()); phi::funcs::SetConstant set_zero; @@ -196,7 +196,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel { "is:%d.", num_rois, out->dims()[0])); - framework::Tensor roi_batch_id_list; + phi::DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({num_rois}); int* roi_batch_id_data = roi_batch_id_list.mutable_data(ctx.GetPlace()); @@ -475,19 +475,22 @@ template class DeformablePSROIPoolGradCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); + auto* input = ctx.Input("Input"); auto* rois = ctx.Input("ROIs"); - auto* trans = ctx.Input("Trans"); - auto* top_count = ctx.Input("TopCount"); - auto* output_grad = ctx.Input(framework::GradVarName("Output")); - auto* input_grad = ctx.Output(framework::GradVarName("Input")); + auto* trans = ctx.Input("Trans"); + auto* top_count = ctx.Input("TopCount"); + auto* output_grad = + ctx.Input(framework::GradVarName("Output")); + auto* input_grad = + ctx.Output(framework::GradVarName("Input")); phi::funcs::SetConstant set_zero; auto& dev_ctx = ctx.template device_context(); if (input_grad) { input_grad->mutable_data(ctx.GetPlace()); set_zero(dev_ctx, input_grad, static_cast(.0)); } - auto* trans_grad = ctx.Output(framework::GradVarName("Trans")); + auto* trans_grad = + ctx.Output(framework::GradVarName("Trans")); if (trans_grad) { trans_grad->mutable_data(ctx.GetPlace()); set_zero(dev_ctx, trans_grad, static_cast(.0)); diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cc b/paddle/fluid/operators/dequantize_abs_max_op.cc index ff4bb5f53341b..99c4fad0fa2ab 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cc +++ b/paddle/fluid/operators/dequantize_abs_max_op.cc @@ -35,10 +35,10 @@ namespace operators { template struct DequantizeFunctor { void operator()(const phi::CPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, float max_range, - framework::Tensor* out) { + phi::DenseTensor* out) { const float* scale_factor = scale->data(); const T* input_data = in->data(); float* output_data = out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cu b/paddle/fluid/operators/dequantize_abs_max_op.cu index 57d2c02adb095..70c0aca78baec 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cu +++ b/paddle/fluid/operators/dequantize_abs_max_op.cu @@ -29,10 +29,10 @@ __global__ void KeDequantize( template struct DequantizeFunctor { void operator()(const phi::GPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, float max_range, - framework::Tensor* out) { + phi::DenseTensor* out) { const T* in_data = in->data(); const float* scale_factor = scale->data(); float* out_data = out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/dequantize_abs_max_op.h b/paddle/fluid/operators/dequantize_abs_max_op.h index fb1fa313da42a..4d9a893c66c3c 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.h +++ b/paddle/fluid/operators/dequantize_abs_max_op.h @@ -30,20 +30,20 @@ namespace operators { template struct DequantizeFunctor { void operator()(const DeviceContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, float max_range, - framework::Tensor* out); + phi::DenseTensor* out); }; template class DequantizeMaxAbsKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); + auto* in = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); float max_range = ctx.Attr("max_range"); diff --git a/paddle/fluid/operators/dequantize_log_op.cc b/paddle/fluid/operators/dequantize_log_op.cc index b3c1770493c9c..62359a2ce2124 100644 --- a/paddle/fluid/operators/dequantize_log_op.cc +++ b/paddle/fluid/operators/dequantize_log_op.cc @@ -34,9 +34,9 @@ namespace operators { template struct DequantizeFunctor { void operator()(const phi::CPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* dict, - framework::Tensor* out) { + const phi::DenseTensor* in, + const phi::DenseTensor* dict, + phi::DenseTensor* out) { const float* dict_data = dict->data(); const T* input_data = in->data(); float* output_data = out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/dequantize_log_op.cu b/paddle/fluid/operators/dequantize_log_op.cu index 2c47d9b17aa06..360871f9e7251 100644 --- a/paddle/fluid/operators/dequantize_log_op.cu +++ b/paddle/fluid/operators/dequantize_log_op.cu @@ -38,9 +38,9 @@ __global__ void KeDequantize(const T* in, template struct DequantizeFunctor { void operator()(const phi::GPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* dict, - framework::Tensor* out) { + const phi::DenseTensor* in, + const phi::DenseTensor* dict, + phi::DenseTensor* out) { const T* in_data = in->data(); const float* dict_data = dict->data(); float* out_data = out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/dequantize_log_op.h b/paddle/fluid/operators/dequantize_log_op.h index 01613be898e7b..d15f0392e82fc 100644 --- a/paddle/fluid/operators/dequantize_log_op.h +++ b/paddle/fluid/operators/dequantize_log_op.h @@ -29,18 +29,18 @@ namespace operators { template struct DequantizeFunctor { void operator()(const DeviceContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* dict, - framework::Tensor* out); + const phi::DenseTensor* in, + const phi::DenseTensor* dict, + phi::DenseTensor* out); }; template class DequantizeLogKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("X"); - auto* dict = ctx.Input("Dict"); - auto* out = ctx.Output("Out"); + auto* in = ctx.Input("X"); + auto* dict = ctx.Input("Dict"); + auto* out = ctx.Output("Out"); auto& dev_ctx = ctx.template device_context(); out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/dequantize_op.h b/paddle/fluid/operators/dequantize_op.h index ea7a08c8f3684..f319828a6be4b 100644 --- a/paddle/fluid/operators/dequantize_op.h +++ b/paddle/fluid/operators/dequantize_op.h @@ -23,7 +23,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; class DeQuantOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cu b/paddle/fluid/operators/detection/anchor_generator_op.cu index 30250eb8cc048..eeb4d731b7b3b 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.cu +++ b/paddle/fluid/operators/detection/anchor_generator_op.cu @@ -75,9 +75,9 @@ template class AnchorGeneratorOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* anchors = ctx.Output("Anchors"); - auto* vars = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* anchors = ctx.Output("Anchors"); + auto* vars = ctx.Output("Variances"); auto anchor_sizes = ctx.Attr>("anchor_sizes"); auto aspect_ratios = ctx.Attr>("aspect_ratios"); @@ -101,13 +101,13 @@ class AnchorGeneratorOpCUDAKernel : public framework::OpKernel { anchors->mutable_data(ctx.GetPlace()); vars->mutable_data(ctx.GetPlace()); - framework::Tensor ar; + phi::DenseTensor ar; framework::TensorFromVector(aspect_ratios, ctx.device_context(), &ar); - framework::Tensor as; + phi::DenseTensor as; framework::TensorFromVector(anchor_sizes, ctx.device_context(), &as); - framework::Tensor sd; + phi::DenseTensor sd; framework::TensorFromVector(stride, ctx.device_context(), &sd); GenAnchors<<>>(anchors->data(), @@ -121,7 +121,7 @@ class AnchorGeneratorOpCUDAKernel : public framework::OpKernel { width, offset); - framework::Tensor v; + phi::DenseTensor v; framework::TensorFromVector(variances, ctx.device_context(), &v); grid = (box_num * 4 + block - 1) / block; SetVariance<<>>( diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h index 767229bfee001..aaebcef3c901f 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.h +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -47,9 +47,9 @@ template class AnchorGeneratorOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* anchors = ctx.Output("Anchors"); - auto* vars = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* anchors = ctx.Output("Anchors"); + auto* vars = ctx.Output("Variances"); auto anchor_sizes = ctx.Attr>("anchor_sizes"); auto aspect_ratios = ctx.Attr>("aspect_ratios"); @@ -106,7 +106,7 @@ class AnchorGeneratorOpKernel : public framework::OpKernel { } } - framework::Tensor var_t; + phi::DenseTensor var_t; var_t.mutable_data( phi::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/bbox_util.cu.h b/paddle/fluid/operators/detection/bbox_util.cu.h index e4accef0fa9b3..a831cbf7062b8 100644 --- a/paddle/fluid/operators/detection/bbox_util.cu.h +++ b/paddle/fluid/operators/detection/bbox_util.cu.h @@ -30,7 +30,7 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h index bfe3adc2c1f20..4046f6b2830d8 100644 --- a/paddle/fluid/operators/detection/bbox_util.h +++ b/paddle/fluid/operators/detection/bbox_util.h @@ -56,11 +56,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool pixel_offset = true) { */ template inline void BoxToDelta(const int box_num, - const framework::Tensor& ex_boxes, - const framework::Tensor& gt_boxes, + const phi::DenseTensor& ex_boxes, + const phi::DenseTensor& gt_boxes, const float* weights, const bool normalized, - framework::Tensor* box_delta) { + phi::DenseTensor* box_delta) { auto ex_boxes_et = framework::EigenTensor::From(ex_boxes); auto gt_boxes_et = framework::EigenTensor::From(gt_boxes); auto trg = framework::EigenTensor::From(*box_delta); @@ -101,9 +101,9 @@ void Gather( } template -void BboxOverlaps(const framework::Tensor& r_boxes, - const framework::Tensor& c_boxes, - framework::Tensor* overlaps) { +void BboxOverlaps(const phi::DenseTensor& r_boxes, + const phi::DenseTensor& c_boxes, + phi::DenseTensor* overlaps) { auto r_boxes_et = framework::EigenTensor::From(r_boxes); auto c_boxes_et = framework::EigenTensor::From(c_boxes); auto overlaps_et = framework::EigenTensor::From(*overlaps); @@ -136,7 +136,7 @@ void BboxOverlaps(const framework::Tensor& r_boxes, // Calculate max IoU between each box and ground-truth and // each row represents one box template -void MaxIoU(const framework::Tensor& iou, framework::Tensor* max_iou) { +void MaxIoU(const phi::DenseTensor& iou, phi::DenseTensor* max_iou) { const T* iou_data = iou.data(); int row = iou.dims()[0]; int col = iou.dims()[1]; @@ -148,9 +148,9 @@ void MaxIoU(const framework::Tensor& iou, framework::Tensor* max_iou) { } } -static void AppendProposals(framework::Tensor* dst, +static void AppendProposals(phi::DenseTensor* dst, int64_t offset, - const framework::Tensor& src) { + const phi::DenseTensor& src) { auto* out_data = dst->data(); auto* to_add_data = src.data(); size_t size_of_t = framework::DataTypeSize(src.dtype()); @@ -163,9 +163,9 @@ static void AppendProposals(framework::Tensor* dst, template void ClipTiledBoxes(const platform::DeviceContext& ctx, - const framework::Tensor& im_info, - const framework::Tensor& input_boxes, - framework::Tensor* out, + const phi::DenseTensor& im_info, + const phi::DenseTensor& input_boxes, + phi::DenseTensor* out, bool is_scale = true, bool pixel_offset = true) { T* out_data = out->mutable_data(ctx.GetPlace()); @@ -197,11 +197,11 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx, // Filter the box with small area template void FilterBoxes(const platform::DeviceContext& ctx, - const framework::Tensor* boxes, + const phi::DenseTensor* boxes, float min_size, - const framework::Tensor& im_info, + const phi::DenseTensor& im_info, bool is_scale, - framework::Tensor* keep, + phi::DenseTensor* keep, bool pixel_offset = true) { const T* im_info_data = im_info.data(); const T* boxes_data = boxes->data(); @@ -238,10 +238,10 @@ void FilterBoxes(const platform::DeviceContext& ctx, template static void BoxCoder(const platform::DeviceContext& ctx, - framework::Tensor* all_anchors, - framework::Tensor* bbox_deltas, - framework::Tensor* variances, - framework::Tensor* proposals, + phi::DenseTensor* all_anchors, + phi::DenseTensor* bbox_deltas, + phi::DenseTensor* variances, + phi::DenseTensor* proposals, const bool pixel_offset = true) { T* proposals_data = proposals->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index ef824d2d8cdcd..45c21c0f570fb 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class BipartiteMatchOp : public framework::OperatorWithKernel { @@ -72,7 +72,7 @@ class BipartiteMatchKernel : public framework::OpKernel { public: // The match_indices must be initialized to -1 at first. // The match_dist must be initialized to 0 at first. - void BipartiteMatch(const Tensor& dist, + void BipartiteMatch(const phi::DenseTensor& dist, int* match_indices, T* match_dist) const { PADDLE_ENFORCE_EQ( @@ -157,7 +157,7 @@ class BipartiteMatchKernel : public framework::OpKernel { } } - void ArgMaxMatch(const Tensor& dist, + void ArgMaxMatch(const phi::DenseTensor& dist, int* match_indices, T* match_dist, T overlap_threshold) const { @@ -197,8 +197,9 @@ class BipartiteMatchKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* dist_mat = context.Input("DistMat"); - auto* match_indices = context.Output("ColToRowMatchIndices"); - auto* match_dist = context.Output("ColToRowMatchDist"); + auto* match_indices = + context.Output("ColToRowMatchIndices"); + auto* match_dist = context.Output("ColToRowMatchDist"); auto& dev_ctx = context.device_context(); diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 87dc4a30abb31..d50759c71fe3d 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTenso = framework::LoDTensor; static constexpr int ImInfoSize = 3; @@ -50,7 +50,7 @@ class GPUBoxClipKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *input = context.Input("Input"); - auto *im_info = context.Input("ImInfo"); + auto *im_info = context.Input("ImInfo"); auto *output = context.Output("Output"); const int64_t num = input->dims()[0]; const int64_t bbox_width = input->numel() / num; diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index 5c816ee3eb5e2..e85ef88ccdc91 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template diff --git a/paddle/fluid/operators/detection/box_coder_op_npu.cc b/paddle/fluid/operators/detection/box_coder_op_npu.cc index 8181f10f2bc5b..4a98920f64b19 100644 --- a/paddle/fluid/operators/detection/box_coder_op_npu.cc +++ b/paddle/fluid/operators/detection/box_coder_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct BoxCoderFunction { @@ -28,29 +28,29 @@ struct BoxCoderFunction { stream = ctx.template device_context() .stream(); } - Tensor Adds(const Tensor& x, float scalar) { + Tensor Adds(const phi::DenseTensor& x, float scalar) { Tensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}}); runner.Run(stream); return y; } - Tensor Muls(const Tensor& x, float scalar) { + Tensor Muls(const phi::DenseTensor& x, float scalar) { Tensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}}); runner.Run(stream); return y; } - Tensor Mul(const Tensor& x, const Tensor& y) { + Tensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { Tensor z; z.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {}); runner.Run(stream); return z; } - Tensor SubWithBroadCast(const Tensor& x, - const Tensor& y, + Tensor SubWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape) { Tensor z; z.mutable_data(shape, place); @@ -58,59 +58,59 @@ struct BoxCoderFunction { runner.Run(stream); return z; } - void DivWithBroadCastVoid(const Tensor& x, - const Tensor& y, + void DivWithBroadCastVoid(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape, - Tensor* z) { + phi::DenseTensor* z) { z->mutable_data(shape, place); const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {}); runner.Run(stream); } - Tensor DivWithBroadCast(const Tensor& x, - const Tensor& y, + Tensor DivWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape) { Tensor z; DivWithBroadCastVoid(x, y, shape, &z); return z; } - void MulWithBroadCastVoid(const Tensor& x, - const Tensor& y, + void MulWithBroadCastVoid(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape, - Tensor* z) { + phi::DenseTensor* z) { z->mutable_data(shape, place); const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {}); runner.Run(stream); } - Tensor MulWithBroadCast(const Tensor& x, - const Tensor& y, + Tensor MulWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape) { Tensor z; MulWithBroadCastVoid(x, y, shape, &z); return z; } - void AddWithBroadCastVoid(const Tensor& x, - const Tensor& y, + void AddWithBroadCastVoid(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape, - Tensor* z) { + phi::DenseTensor* z) { z->mutable_data(shape, place); const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {}); runner.Run(stream); } - Tensor AddWithBroadCast(const Tensor& x, - const Tensor& y, + Tensor AddWithBroadCast(const phi::DenseTensor& x, + const phi::DenseTensor& y, const framework::DDim& shape) { Tensor z; AddWithBroadCastVoid(x, y, shape, &z); return z; } - Tensor Abs(const Tensor& x) { + Tensor Abs(const phi::DenseTensor& x) { Tensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Abs", {x}, {y}, {}); runner.Run(stream); return y; } - Tensor Log(const Tensor& x) { + Tensor Log(const phi::DenseTensor& x) { Tensor t_x_m1 = Adds(x, -1); Tensor y; y.mutable_data(x.dims(), place); @@ -118,14 +118,14 @@ struct BoxCoderFunction { runner.Run(stream); return y; } - Tensor Exp(const Tensor& x) { + Tensor Exp(const phi::DenseTensor& x) { Tensor y; y.mutable_data(x.dims(), place); const auto& runner = NpuOpRunner("Exp", {x}, {y}, {}); runner.Run(stream); return y; } - Tensor Dot(const Tensor& x, const Tensor& y) { + Tensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) { auto dim_x = x.dims(); auto dim_y = y.dims(); PADDLE_ENFORCE_EQ( @@ -158,7 +158,7 @@ struct BoxCoderFunction { void ConcatVoid(const std::vector& inputs, const framework::DDim& shape_out, int axis, - Tensor* output) { + phi::DenseTensor* output) { output->mutable_data(shape_out, place); std::vector names; for (size_t i = 0; i < inputs.size(); i++) { @@ -179,7 +179,7 @@ struct BoxCoderFunction { ConcatVoid(inputs, shape_out, axis, &output); return output; } - Tensor Slice(const Tensor& x, + Tensor Slice(const phi::DenseTensor& x, const std::vector& offsets, const std::vector& size, const framework::DDim& shape) { @@ -201,7 +201,7 @@ template void Vector2Tensor(const framework::ExecutionContext& ctx, const std::vector& vec, const framework::DDim& ddim, - Tensor* tsr) { + phi::DenseTensor* tsr) { framework::TensorFromVector(vec, ctx.device_context(), tsr); ctx.template device_context().Wait(); tsr->Resize(ddim); @@ -209,12 +209,12 @@ void Vector2Tensor(const framework::ExecutionContext& ctx, template void BoxCoderEnc(const framework::ExecutionContext& ctx, - const Tensor* tb, - const Tensor* pb, - const Tensor* pbv, + const phi::DenseTensor* tb, + const phi::DenseTensor* pb, + const phi::DenseTensor* pbv, const bool norm, const std::vector& variance, - Tensor* out) { + phi::DenseTensor* out) { auto M = pb->dims()[0]; auto N = tb->dims()[0]; auto shape_0 = phi::make_ddim({4, 2}); @@ -273,13 +273,13 @@ void BoxCoderEnc(const framework::ExecutionContext& ctx, template void BoxCoderDec(const framework::ExecutionContext& ctx, - const Tensor* tb, - const Tensor* pb, - const Tensor* pbv, + const phi::DenseTensor* tb, + const phi::DenseTensor* pb, + const phi::DenseTensor* pbv, const bool norm, const std::vector& variance, int axis, - Tensor* out) { + phi::DenseTensor* out) { auto shape_0 = phi::make_ddim({4, 2}); Tensor m_diff; Tensor m_aver; @@ -378,10 +378,10 @@ template class BoxCoderNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* prior_box = ctx.Input("PriorBox"); - auto* prior_box_var = ctx.Input("PriorBoxVar"); + auto* prior_box = ctx.Input("PriorBox"); + auto* prior_box_var = ctx.Input("PriorBoxVar"); auto* target_box = ctx.Input("TargetBox"); - auto* output_box = ctx.Output("OutputBox"); + auto* output_box = ctx.Output("OutputBox"); std::vector variance = ctx.Attr>("variance"); const int axis = ctx.Attr("axis"); diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu index f87a636bdfb02..e37c0299110a3 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.cu @@ -100,12 +100,12 @@ class BoxDecoderAndAssignCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* prior_box = context.Input("PriorBox"); - auto* prior_box_var = context.Input("PriorBoxVar"); + auto* prior_box_var = context.Input("PriorBoxVar"); auto* target_box = context.Input("TargetBox"); auto* box_score = context.Input("BoxScore"); - auto* output_box = context.Output("DecodeBox"); + auto* output_box = context.Output("DecodeBox"); auto* output_assign_box = - context.Output("OutputAssignBox"); + context.Output("OutputAssignBox"); auto roi_num = target_box->dims()[0]; auto class_num = box_score->dims()[1]; diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h index 85ee3b76448ad..1377fecd3d4d8 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h @@ -25,12 +25,12 @@ class BoxDecoderAndAssignKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* prior_box = context.Input("PriorBox"); - auto* prior_box_var = context.Input("PriorBoxVar"); + auto* prior_box_var = context.Input("PriorBoxVar"); auto* target_box = context.Input("TargetBox"); auto* box_score = context.Input("BoxScore"); - auto* output_box = context.Output("DecodeBox"); + auto* output_box = context.Output("DecodeBox"); auto* output_assign_box = - context.Output("OutputAssignBox"); + context.Output("OutputAssignBox"); int roi_num = target_box->dims()[0]; int class_num = box_score->dims()[1]; auto* target_box_data = target_box->data(); diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc index 48902f517967b..95b9d006bffe6 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc @@ -16,7 +16,7 @@ limitations under the License.*/ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class CollectFpnProposalsOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index 0fbc54d3135d6..936dc7a50b45b 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -33,7 +33,7 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; static constexpr int kNumCUDAThreads = 64; @@ -89,12 +89,12 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { int lod_size; auto place = dev_ctx.GetPlace(); - auto multi_rois_num = ctx.MultiInput("MultiLevelRoIsNum"); + auto multi_rois_num = ctx.MultiInput("MultiLevelRoIsNum"); for (size_t i = 0; i < roi_ins.size(); ++i) { auto roi_in = roi_ins[i]; auto score_in = score_ins[i]; if (multi_rois_num.size() > 0) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync( *multi_rois_num[i], platform::CPUPlace(), &temp); const int* length_in = temp.data(); @@ -250,7 +250,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel { } if (ctx.HasOutput("RoisNum")) { - auto* rois_num = ctx.Output("RoisNum"); + auto* rois_num = ctx.Output("RoisNum"); int* rois_num_data = rois_num->mutable_data({lod_size}, place); memory::Copy(place, rois_num_data, diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index c9b9acfcb2005..6ac6fc3e09a69 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -67,7 +67,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { auto multi_layer_scores = context.MultiInput("MultiLevelScores"); auto multi_rois_num = - context.MultiInput("MultiLevelRoIsNum"); + context.MultiInput("MultiLevelRoIsNum"); int num_size = multi_rois_num.size(); auto* fpn_rois = context.Output("FpnRois"); @@ -182,7 +182,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel { } num_per_batch.emplace_back(post_nms_topN - pre_idx); if (context.HasOutput("RoisNum")) { - auto* rois_num = context.Output("RoisNum"); + auto* rois_num = context.Output("RoisNum"); int* rois_num_data = rois_num->mutable_data({batch_size}, context.GetPlace()); for (int i = 0; i < batch_size; i++) { diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cu b/paddle/fluid/operators/detection/density_prior_box_op.cu index aa60d054546cd..9dbdc35d07f02 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.cu +++ b/paddle/fluid/operators/detection/density_prior_box_op.cu @@ -87,10 +87,10 @@ template class DensityPriorBoxOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* image = ctx.Input("Image"); + auto* boxes = ctx.Output("Boxes"); + auto* vars = ctx.Output("Variances"); auto variances = ctx.Attr>("variances"); auto is_clip = ctx.Attr("clip"); @@ -124,7 +124,7 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel { } int step_average = static_cast((step_width + step_height) * 0.5); - framework::Tensor h_temp; + phi::DenseTensor h_temp; T* tdata = h_temp.mutable_data({num_priors * 4}, platform::CPUPlace()); int idx = 0; for (size_t s = 0; s < fixed_sizes.size(); ++s) { @@ -152,7 +152,7 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel { boxes->mutable_data(ctx.GetPlace()); vars->mutable_data(ctx.GetPlace()); - framework::Tensor d_temp; + phi::DenseTensor d_temp; framework::TensorCopy(h_temp, ctx.GetPlace(), &d_temp); // At least use 32 threads, at most 512 threads. diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h index 0912ce9016031..f3e3cb0ffb6ca 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.h +++ b/paddle/fluid/operators/detection/density_prior_box_op.h @@ -22,10 +22,10 @@ template class DensityPriorBoxOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* image = ctx.Input("Image"); + auto* boxes = ctx.Output("Boxes"); + auto* vars = ctx.Output("Variances"); auto variances = ctx.Attr>("variances"); auto clip = ctx.Attr("clip"); @@ -121,7 +121,7 @@ class DensityPriorBoxOpKernel : public framework::OpKernel { return std::min(std::max(v, 0.), 1.); }); } - framework::Tensor var_t; + phi::DenseTensor var_t; var_t.mutable_data( phi::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc index 2b28cd926f513..a6f9170712d96 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op_npu.cc +++ b/paddle/fluid/operators/detection/density_prior_box_op_npu.cc @@ -15,7 +15,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using fp16 = paddle::platform::float16; template @@ -31,55 +31,67 @@ struct DensityPriorBoxFunction { FillNpuTensorWithConstant(&t0, static_cast(0)); FillNpuTensorWithConstant(&t1, static_cast(1)); } - void Arange(int n, Tensor* x) { + void Arange(int n, phi::DenseTensor* x) { // x should be init first FillNpuTensorWithConstant(&tn, static_cast(n)); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {*x}, {}); runner.Run(stream); } - void Add(const Tensor* x, const Tensor* y, Tensor* z) { + void Add(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Cast(const Tensor* x, Tensor* y) { + void Cast(const phi::DenseTensor* x, phi::DenseTensor* y) { auto dst_dtype = ConvertToNpuDtype(framework::TransToProtoVarType(y->type())); const auto& runner = NpuOpRunner( "Cast", {*x}, {*y}, {{"dst_type", static_cast(dst_dtype)}}); runner.Run(stream); } - void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + void Sub(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + void Mul(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Adds(const Tensor* x, float scalar, Tensor* y) { + void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { // y should be init first const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - void Muls(const Tensor* x, float scalar, Tensor* y) { + void Muls(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { // y should be init first const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + void Maximum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + void Minimum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Concat(const std::vector& inputs, int axis, Tensor* output) { + void Concat(const std::vector& inputs, + int axis, + phi::DenseTensor* output) { // output should be init first std::vector names; for (size_t i = 0; i < inputs.size(); i++) { @@ -93,7 +105,9 @@ struct DensityPriorBoxFunction { runner.AddInputNames(names); runner.Run(stream); } - void Tile(const Tensor* x, Tensor* y, const std::vector& multiples) { + void Tile(const phi::DenseTensor* x, + phi::DenseTensor* y, + const std::vector& multiples) { // y should be init first if (x->dims() == y->dims()) { framework::TensorCopy( @@ -107,7 +121,7 @@ struct DensityPriorBoxFunction { NpuOpRunner("TileD", {*x}, {*y}, {{"multiples", multiples}}); runner.Run(stream); } - void FloatVec2Tsr(const std::vector& vec, Tensor* tsr_dst) { + void FloatVec2Tsr(const std::vector& vec, phi::DenseTensor* tsr_dst) { // framework::TensorFromVector(vec, ctx.device_context(), tsr_dst); ctx.template device_context().Wait(); @@ -123,7 +137,7 @@ struct DensityPriorBoxFunction { }; template <> -void DensityPriorBoxFunction::Arange(int n, Tensor* x) { +void DensityPriorBoxFunction::Arange(int n, phi::DenseTensor* x) { Tensor x_fp32(experimental::DataType::FLOAT32); x_fp32.mutable_data(x->dims(), place); FillNpuTensorWithConstant(&tn, static_cast(n)); @@ -134,7 +148,7 @@ void DensityPriorBoxFunction::Arange(int n, Tensor* x) { template <> void DensityPriorBoxFunction::FloatVec2Tsr(const std::vector& vec, - Tensor* tsr_dst) { + phi::DenseTensor* tsr_dst) { Tensor tsr_fp32(experimental::DataType::FLOAT32); tsr_fp32.mutable_data(tsr_dst->dims(), place); framework::TensorFromVector(vec, ctx.device_context(), &tsr_fp32); @@ -146,10 +160,10 @@ template class DensityPriorBoxOpNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* image = ctx.Input("Image"); + auto* boxes = ctx.Output("Boxes"); + auto* vars = ctx.Output("Variances"); auto variances = ctx.Attr>("variances"); auto clip = ctx.Attr("clip"); diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index 5473a57902b87..feb12ab90f211 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -25,12 +25,12 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; const int kBoxDim = 4; template -void AppendMask(LoDTensor* out, int64_t offset, Tensor* to_add) { +void AppendMask(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) { auto* out_data = out->data(); auto* to_add_data = to_add->data(); memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); @@ -123,11 +123,11 @@ class GenerateMaskLabelsOp : public framework::OperatorWithKernel { */ template static inline void ExpandMaskTarget(const phi::CPUContext& ctx, - const Tensor& masks, - const Tensor& mask_class_labels, + const phi::DenseTensor& masks, + const phi::DenseTensor& mask_class_labels, const int resolution, const int num_classes, - Tensor* mask_targets) { + phi::DenseTensor* mask_targets) { const uint8_t* masks_data = masks.data(); int64_t num_mask = masks.dims()[0]; const int* mask_class_labels_data = mask_class_labels.data(); @@ -151,12 +151,12 @@ static inline void ExpandMaskTarget(const phi::CPUContext& ctx, template std::vector SampleMaskForOneImage(const phi::CPUContext& ctx, - const Tensor& im_info, - const Tensor& gt_classes, - const Tensor& is_crowd, - const Tensor& gt_segms, - const Tensor& rois, - const Tensor& label_int32, + const phi::DenseTensor& im_info, + const phi::DenseTensor& gt_classes, + const phi::DenseTensor& is_crowd, + const phi::DenseTensor& gt_segms, + const phi::DenseTensor& rois, + const phi::DenseTensor& label_int32, const int num_classes, const int resolution, const framework::LoD& segm_length) { diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index 7376e0993a506..64a6120bbcad2 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -25,12 +25,12 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; const int kBoxDim = 4; template -void AppendRois(LoDTensor* out, int64_t offset, Tensor* to_add) { +void AppendRois(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) { auto* out_data = out->data(); auto* to_add_data = to_add->data(); memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); @@ -41,9 +41,9 @@ void AppendRois(LoDTensor* out, int64_t offset, Tensor* to_add) { // and the corresponding RoI will be removed. template void FilterRoIs(const platform::DeviceContext& ctx, - const Tensor& rpn_rois, - const Tensor& max_overlap, - Tensor* keep) { + const phi::DenseTensor& rpn_rois, + const phi::DenseTensor& max_overlap, + phi::DenseTensor* keep) { const T* rpn_rois_dt = rpn_rois.data(); const T* max_overlap_dt = max_overlap.data(); int rois_num = max_overlap.numel(); @@ -169,9 +169,9 @@ class GenerateProposalLabelsOp : public framework::OperatorWithKernel { template void Concat(const phi::CPUContext& context, - const Tensor& in_tensor_a, - const Tensor& in_tensor_b, - Tensor* out_tensor) { + const phi::DenseTensor& in_tensor_a, + const phi::DenseTensor& in_tensor_b, + phi::DenseTensor* out_tensor) { int axis = 0; std::vector inputs; inputs.emplace_back(in_tensor_a); @@ -182,8 +182,8 @@ void Concat(const phi::CPUContext& context, template std::vector> SampleFgBgGt(const phi::CPUContext& context, - Tensor* iou, - const Tensor& is_crowd, + phi::DenseTensor* iou, + const phi::DenseTensor& is_crowd, const int batch_size_per_im, const float fg_fraction, const float fg_thresh, @@ -192,7 +192,7 @@ std::vector> SampleFgBgGt(const phi::CPUContext& context, std::minstd_rand engine, const bool use_random, const bool is_cascade_rcnn, - const Tensor& rpn_rois) { + const phi::DenseTensor& rpn_rois) { std::vector fg_inds; std::vector bg_inds; std::vector mapped_gt_inds; @@ -286,17 +286,17 @@ std::vector> SampleFgBgGt(const phi::CPUContext& context, template void GatherBoxesLabels(const phi::CPUContext& context, - const Tensor& boxes, - const Tensor& max_overlap, - const Tensor& gt_boxes, - const Tensor& gt_classes, + const phi::DenseTensor& boxes, + const phi::DenseTensor& max_overlap, + const phi::DenseTensor& gt_boxes, + const phi::DenseTensor& gt_classes, const std::vector& fg_inds, const std::vector& bg_inds, const std::vector& gt_inds, - Tensor* sampled_boxes, - Tensor* sampled_labels, - Tensor* sampled_gts, - Tensor* sampled_max_overlap) { + phi::DenseTensor* sampled_boxes, + phi::DenseTensor* sampled_labels, + phi::DenseTensor* sampled_gts, + phi::DenseTensor* sampled_max_overlap) { int fg_num = fg_inds.size(); int bg_num = bg_inds.size(); Tensor fg_inds_t, bg_inds_t, gt_box_inds_t, gt_label_inds_t; @@ -335,11 +335,11 @@ void GatherBoxesLabels(const phi::CPUContext& context, template std::vector SampleRoisForOneImage( const phi::CPUContext& context, - const Tensor& rpn_rois_in, - const Tensor& gt_classes, - const Tensor& is_crowd, - const Tensor& gt_boxes, - const Tensor& im_info, + const phi::DenseTensor& rpn_rois_in, + const phi::DenseTensor& gt_classes, + const phi::DenseTensor& is_crowd, + const phi::DenseTensor& gt_boxes, + const phi::DenseTensor& im_info, const int batch_size_per_im, const float fg_fraction, const float fg_thresh, @@ -351,7 +351,7 @@ std::vector SampleRoisForOneImage( bool use_random, bool is_cascade_rcnn, bool is_cls_agnostic, - const Tensor& max_overlap) { + const phi::DenseTensor& max_overlap) { // 1.1 map to original image auto im_scale = im_info.data()[2]; Tensor rpn_rois; @@ -618,7 +618,7 @@ class GenerateProposalLabelsKernel : public framework::OpKernel { Tensor im_info_slice = im_info->Slice(i, i + 1); Tensor max_overlap_slice; if (is_cascade_rcnn) { - auto* max_overlap = context.Input("MaxOverlap"); + auto* max_overlap = context.Input("MaxOverlap"); max_overlap_slice = max_overlap->Slice(rpn_rois_lod[i], rpn_rois_lod[i + 1]); } else { diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index 0118cc1f76b3f..84f542b61d120 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class GenerateProposalsOp : public framework::OperatorWithKernel { @@ -77,17 +77,18 @@ template class GenerateProposalsKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *scores = context.Input("Scores"); - auto *bbox_deltas = context.Input("BboxDeltas"); - auto *im_info = context.Input("ImInfo"); - auto anchors = GET_DATA_SAFELY(context.Input("Anchors"), + auto *scores = context.Input("Scores"); + auto *bbox_deltas = context.Input("BboxDeltas"); + auto *im_info = context.Input("ImInfo"); + auto anchors = GET_DATA_SAFELY(context.Input("Anchors"), "Input", "Anchors", "GenerateProposals"); - auto variances = GET_DATA_SAFELY(context.Input("Variances"), - "Input", - "Variances", - "GenerateProposals"); + auto variances = + GET_DATA_SAFELY(context.Input("Variances"), + "Input", + "Variances", + "GenerateProposals"); auto *rpn_rois = context.Output("RpnRois"); auto *rpn_roi_probs = context.Output("RpnRoiProbs"); @@ -165,7 +166,7 @@ class GenerateProposalsKernel : public framework::OpKernel { tmp_num.push_back(proposals.dims()[0]); } if (context.HasOutput("RpnRoisNum")) { - auto *rpn_rois_num = context.Output("RpnRoisNum"); + auto *rpn_rois_num = context.Output("RpnRoisNum"); rpn_rois_num->mutable_data({num}, context.GetPlace()); int *num_data = rpn_rois_num->data(); for (int i = 0; i < num; i++) { diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index ed1ad6da34d4a..64aea88758c3d 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; namespace { @@ -131,17 +131,18 @@ template class CUDAGenerateProposalsKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *scores = context.Input("Scores"); - auto *bbox_deltas = context.Input("BboxDeltas"); - auto *im_info = context.Input("ImInfo"); - auto anchors = GET_DATA_SAFELY(context.Input("Anchors"), + auto *scores = context.Input("Scores"); + auto *bbox_deltas = context.Input("BboxDeltas"); + auto *im_info = context.Input("ImInfo"); + auto anchors = GET_DATA_SAFELY(context.Input("Anchors"), "Input", "Anchors", "GenerateProposals"); - auto variances = GET_DATA_SAFELY(context.Input("Variances"), - "Input", - "Variances", - "GenerateProposals"); + auto variances = + GET_DATA_SAFELY(context.Input("Variances"), + "Input", + "Variances", + "GenerateProposals"); auto *rpn_rois = context.Output("RpnRois"); auto *rpn_roi_probs = context.Output("RpnRoiProbs"); @@ -240,7 +241,7 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel { tmp_num.push_back(proposals.dims()[0]); } if (context.HasOutput("RpnRoisNum")) { - auto *rpn_rois_num = context.Output("RpnRoisNum"); + auto *rpn_rois_num = context.Output("RpnRoisNum"); rpn_rois_num->mutable_data({num}, context.GetPlace()); int *num_data = rpn_rois_num->data(); memory::Copy(place, diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc index 15918030c024b..71c944a1e68aa 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc @@ -29,7 +29,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class GenerateProposalsV2Op : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/detection/iou_similarity_op.h b/paddle/fluid/operators/detection/iou_similarity_op.h index 807ccd68c5377..ae5095e664705 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op.h +++ b/paddle/fluid/operators/detection/iou_similarity_op.h @@ -110,7 +110,7 @@ class IOUSimilarityKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const framework::LoDTensor* in_x = ctx.Input("X"); - const framework::Tensor* in_y = ctx.Input("Y"); + const phi::DenseTensor* in_y = ctx.Input("Y"); bool normalized = ctx.Attr("box_normalized"); framework::LoDTensor* out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc b/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc index 617daf670b0da..f043bbd2e162a 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op_mlu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct IouFunction { @@ -26,11 +26,15 @@ struct IouFunction { explicit IouFunction(const framework::ExecutionContext& ctx) : ctx(ctx) { place = ctx.GetPlace(); } - void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + void Transpose(const phi::DenseTensor* x, + phi::DenseTensor* y, + const std::vector& axis) { // y should be init first TransposeFromMLUTensor(ctx, axis, x, y, false /*need_reshape_or_alloc*/); } - void Add(const Tensor* x, const Tensor* y, Tensor* z) { + void Add(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -49,7 +53,9 @@ struct IouFunction { ToCnnlDataType()); } - void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + void Sub(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -67,7 +73,9 @@ struct IouFunction { GetBasePtr(z), ToCnnlDataType()); } - void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + void Mul(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -85,7 +93,9 @@ struct IouFunction { GetBasePtr(z), ToCnnlDataType()); } - void DivNoNan(const Tensor* x, const Tensor* y, Tensor* z) { + void DivNoNan(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -102,7 +112,7 @@ struct IouFunction { z_desc.get(), GetBasePtr(z)); } - void Adds(const Tensor* x, float scalar, Tensor* y) { + void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { // y should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -116,7 +126,9 @@ struct IouFunction { y_desc.get(), GetBasePtr(y)); } - void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + void Maximum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -130,7 +142,9 @@ struct IouFunction { z_desc.get(), GetBasePtr(z)); } - void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + void Minimum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first MLUCnnlTensorDesc x_desc(*x); MLUCnnlTensorDesc y_desc(*y); @@ -155,7 +169,7 @@ class IouSimilarityMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); + auto* y = ctx.Input("Y"); bool normalized = ctx.Attr("box_normalized"); auto* out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc index 19c9d516976b7..ab7716a909588 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op_npu.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct IouFunction { @@ -28,43 +28,57 @@ struct IouFunction { stream = ctx.template device_context() .stream(); } - void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + void Transpose(const phi::DenseTensor* x, + phi::DenseTensor* y, + const std::vector& axis) { // y should be init first const auto& runner = NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}}); runner.Run(stream); } - void Add(const Tensor* x, const Tensor* y, Tensor* z) { + void Add(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + void Sub(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + void Mul(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); runner.Run(stream); } - void DivNoNan(const Tensor* x, const Tensor* y, Tensor* z) { + void DivNoNan(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("DivNoNan", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Adds(const Tensor* x, float scalar, Tensor* y) { + void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { // y should be init first const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + void Maximum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + void Minimum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // z should be init first const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); runner.Run(stream); @@ -81,7 +95,7 @@ class IouSimilarityNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); + auto* y = ctx.Input("Y"); bool normalized = ctx.Attr("box_normalized"); auto* out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc b/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc index 04e2b758e8074..0a9f077e03bf0 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op_xpu.cc @@ -24,7 +24,7 @@ class XPUIOUSimilarityKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const framework::LoDTensor* in_x = ctx.Input("X"); - const framework::Tensor* in_y = ctx.Input("Y"); + const phi::DenseTensor* in_y = ctx.Input("Y"); bool normalized = ctx.Attr("box_normalized"); framework::LoDTensor* out = ctx.Output("Out"); diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc index 16e2c28265d14..f75e4c96ba81d 100644 --- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc +++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class LocalityAwareNMSOp : public framework::OperatorWithKernel { @@ -166,8 +166,8 @@ void GetMaxScoreIndexWithLocalityAware( template class LocalityAwareNMSKernel : public framework::OpKernel { public: - void LocalityAwareNMSFast(Tensor* bbox, - Tensor* scores, + void LocalityAwareNMSFast(phi::DenseTensor* bbox, + phi::DenseTensor* scores, const T score_threshold, const T nms_threshold, const T eta, @@ -237,8 +237,8 @@ class LocalityAwareNMSKernel : public framework::OpKernel { } void LocalityAwareNMS(const framework::ExecutionContext& ctx, - Tensor* scores, - Tensor* bboxes, + phi::DenseTensor* scores, + phi::DenseTensor* bboxes, const int scores_size, std::map>* indices, int* num_nmsed_out) const { @@ -309,11 +309,11 @@ class LocalityAwareNMSKernel : public framework::OpKernel { void LocalityAwareNMSOutput( const platform::DeviceContext& ctx, - const Tensor& scores, - const Tensor& bboxes, + const phi::DenseTensor& scores, + const phi::DenseTensor& bboxes, const std::map>& selected_indices, const int scores_size, - Tensor* outs, + phi::DenseTensor* outs, int* oindices = nullptr, const int offset = 0) const { int64_t predict_dim = scores.dims()[1]; diff --git a/paddle/fluid/operators/detection/matrix_nms_op.cc b/paddle/fluid/operators/detection/matrix_nms_op.cc index 1c755c62ebc1b..618b2bdd23d89 100644 --- a/paddle/fluid/operators/detection/matrix_nms_op.cc +++ b/paddle/fluid/operators/detection/matrix_nms_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class MatrixNMSOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/detection/mine_hard_examples_op.cc b/paddle/fluid/operators/detection/mine_hard_examples_op.cc index f3df3b228d7ee..577b4ca572f36 100644 --- a/paddle/fluid/operators/detection/mine_hard_examples_op.cc +++ b/paddle/fluid/operators/detection/mine_hard_examples_op.cc @@ -53,10 +53,10 @@ template class MineHardExamplesKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in_cls_loss = ctx.Input("ClsLoss"); - auto* in_loc_loss = ctx.Input("LocLoss"); - auto* in_matched_indices = ctx.Input("MatchIndices"); - auto* in_match_dist = ctx.Input("MatchDist"); + auto* in_cls_loss = ctx.Input("ClsLoss"); + auto* in_loc_loss = ctx.Input("LocLoss"); + auto* in_matched_indices = ctx.Input("MatchIndices"); + auto* in_match_dist = ctx.Input("MatchDist"); float neg_pos_ratio = ctx.Attr("neg_pos_ratio"); T neg_dist_threshold = static_cast(ctx.Attr("neg_dist_threshold")); @@ -66,7 +66,7 @@ class MineHardExamplesKernel : public framework::OpKernel { auto out_neg_indices = ctx.Output("NegIndices"); auto out_match_indices = - ctx.Output("UpdatedMatchIndices"); + ctx.Output("UpdatedMatchIndices"); framework::TensorCopy( *in_matched_indices, ctx.GetPlace(), out_match_indices); diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 67b26ddbc2df9..676ee804e23bc 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -21,10 +21,11 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; -inline std::vector GetNmsLodFromRoisNum(const Tensor* rois_num) { +inline std::vector GetNmsLodFromRoisNum( + const phi::DenseTensor* rois_num) { std::vector rois_lod; auto* rois_num_data = rois_num->data(); rois_lod.push_back(static_cast(0)); @@ -124,9 +125,9 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { template void SliceOneClass(const platform::DeviceContext& ctx, - const framework::Tensor& items, + const phi::DenseTensor& items, const int class_id, - framework::Tensor* one_class_item) { + phi::DenseTensor* one_class_item) { T* item_data = one_class_item->mutable_data(ctx.GetPlace()); const T* items_data = items.data(); const int64_t num_item = items.dims()[0]; @@ -148,8 +149,8 @@ void SliceOneClass(const platform::DeviceContext& ctx, template class MultiClassNMSKernel : public framework::OpKernel { public: - void NMSFast(const Tensor& bbox, - const Tensor& scores, + void NMSFast(const phi::DenseTensor& bbox, + const phi::DenseTensor& scores, const T score_threshold, const T nms_threshold, const T eta, @@ -211,8 +212,8 @@ class MultiClassNMSKernel : public framework::OpKernel { } void MultiClassNMS(const framework::ExecutionContext& ctx, - const Tensor& scores, - const Tensor& bboxes, + const phi::DenseTensor& scores, + const phi::DenseTensor& bboxes, const int scores_size, std::map>* indices, int* num_nmsed_out) const { @@ -301,11 +302,11 @@ class MultiClassNMSKernel : public framework::OpKernel { } void MultiClassOutput(const platform::DeviceContext& ctx, - const Tensor& scores, - const Tensor& bboxes, + const phi::DenseTensor& scores, + const phi::DenseTensor& bboxes, const std::map>& selected_indices, const int scores_size, - Tensor* outs, + phi::DenseTensor* outs, int* oindices = nullptr, const int offset = 0) const { int64_t class_num = scores.dims()[1]; @@ -362,7 +363,7 @@ class MultiClassNMSKernel : public framework::OpKernel { bool return_index = ctx.HasOutput("Index") ? true : false; auto index = ctx.Output("Index"); bool has_roisnum = ctx.HasInput("RoisNum") ? true : false; - auto rois_num = ctx.Input("RoisNum"); + auto rois_num = ctx.Input("RoisNum"); auto score_dims = scores->dims(); auto score_size = score_dims.size(); auto& dev_ctx = ctx.template device_context(); @@ -467,7 +468,7 @@ class MultiClassNMSKernel : public framework::OpKernel { } } if (ctx.HasOutput("NmsRoisNum")) { - auto* nms_rois_num = ctx.Output("NmsRoisNum"); + auto* nms_rois_num = ctx.Output("NmsRoisNum"); nms_rois_num->mutable_data({n}, ctx.GetPlace()); int* num_data = nms_rois_num->data(); for (int i = 1; i <= n; i++) { diff --git a/paddle/fluid/operators/detection/nms_op.cc b/paddle/fluid/operators/detection/nms_op.cc index 03680538f778e..66682c67870ba 100644 --- a/paddle/fluid/operators/detection/nms_op.cc +++ b/paddle/fluid/operators/detection/nms_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class NMSOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index 7450ffa876339..e386465c3bdf6 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class PolygonBoxTransformCPUKernel : public framework::OpKernel { @@ -27,10 +27,10 @@ class PolygonBoxTransformCPUKernel : public framework::OpKernel { platform::is_cpu_place(ctx.GetPlace()), true, platform::errors::InvalidArgument("It must use CUDAPlace.")); - auto* in = ctx.Input("Input"); + auto* in = ctx.Input("Input"); auto in_dims = in->dims(); const T* in_data = in->data(); - auto* out = ctx.Output("Output"); + auto* out = ctx.Output("Output"); T* out_data = out->mutable_data(ctx.GetPlace()); int batch_size = in_dims[0]; diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu index c90b5b4de0268..49e3d3d96ba5d 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cu +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using platform::PADDLE_CUDA_NUM_THREADS; #define CUDA_BLOCK_SIZE 16 @@ -48,10 +48,10 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { true, platform::errors::InvalidArgument( "The polygon_box_transform operator needs to be executed on GPU.")); - auto* in = ctx.Input("Input"); + auto* in = ctx.Input("Input"); auto in_dims = in->dims(); const T* in_data = in->data(); - auto* out = ctx.Output("Output"); + auto* out = ctx.Output("Output"); T* out_data = out->mutable_data(ctx.GetPlace()); int batch_size = in_dims[0]; diff --git a/paddle/fluid/operators/detection/prior_box_op.cc b/paddle/fluid/operators/detection/prior_box_op.cc index 03733e34ec670..de6e0822fe37c 100644 --- a/paddle/fluid/operators/detection/prior_box_op.cc +++ b/paddle/fluid/operators/detection/prior_box_op.cc @@ -43,7 +43,7 @@ class PriorBoxOp : public framework::OperatorWithKernel { library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; auto input_image_type = framework::TransToProtoVarType( - ctx.Input("Image")->dtype()); + ctx.Input("Image")->dtype()); int customized_type_value = framework::OpKernelType::kDefaultCustomizedTypeValue; if (input_image_type == framework::DataTypeTrait::DataType()) { diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h index 889bc8354bc41..3adbfda50a779 100644 --- a/paddle/fluid/operators/detection/prior_box_op.h +++ b/paddle/fluid/operators/detection/prior_box_op.h @@ -54,10 +54,10 @@ template class PriorBoxOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* vars = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* image = ctx.Input("Image"); + auto* boxes = ctx.Output("Boxes"); + auto* vars = ctx.Output("Variances"); auto min_sizes = ctx.Attr>("min_sizes"); auto max_sizes = ctx.Attr>("max_sizes"); @@ -171,7 +171,7 @@ class PriorBoxOpKernel : public framework::OpKernel { }); } - framework::Tensor var_t; + phi::DenseTensor var_t; var_t.mutable_data( phi::make_ddim({1, static_cast(variances.size())}), ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/prior_box_op_npu.cc b/paddle/fluid/operators/detection/prior_box_op_npu.cc index 9098c4084e143..8a3a313be159c 100644 --- a/paddle/fluid/operators/detection/prior_box_op_npu.cc +++ b/paddle/fluid/operators/detection/prior_box_op_npu.cc @@ -18,16 +18,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class PriorBoxNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* image = ctx.Input("Image"); - auto* boxes = ctx.Output("Boxes"); - auto* variances = ctx.Output("Variances"); + auto* input = ctx.Input("Input"); + auto* image = ctx.Input("Image"); + auto* boxes = ctx.Output("Boxes"); + auto* variances = ctx.Output("Variances"); PADDLE_ENFORCE_EQ(boxes->dims(), variances->dims(), diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc index 2f3b59db5c038..2ebe2915e81ce 100644 --- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc +++ b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { @@ -413,7 +413,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { const std::vector& scores, const std::vector& bboxes, const std::vector& anchors, - const Tensor& im_info, + const phi::DenseTensor& im_info, std::vector>* nmsed_out, int* num_nmsed_out) const { int64_t nms_top_k = ctx.Attr("nms_top_k"); @@ -471,7 +471,7 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { void MultiClassOutput(const platform::DeviceContext& ctx, const std::vector>& nmsed_out, - Tensor* outs) const { + phi::DenseTensor* outs) const { auto* odata = outs->data(); int count = 0; int64_t out_dim = 6; @@ -487,9 +487,9 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& ctx) const override { - auto boxes = ctx.MultiInput("BBoxes"); - auto scores = ctx.MultiInput("Scores"); - auto anchors = ctx.MultiInput("Anchors"); + auto boxes = ctx.MultiInput("BBoxes"); + auto scores = ctx.MultiInput("Scores"); + auto anchors = ctx.MultiInput("Anchors"); auto* im_info = ctx.Input("ImInfo"); auto* outs = ctx.Output("Out"); diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index 9994864b47d2b..c91a4f6c30a77 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -249,12 +249,12 @@ template class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - auto* mask = ctx.Output("Mask"); + auto* out = ctx.Output("Out"); + auto* mask = ctx.Output("Mask"); auto* out_transform_matrix = - ctx.Output("TransformMatrix"); + ctx.Output("TransformMatrix"); auto transformed_height = ctx.Attr("transformed_height"); auto transformed_width = ctx.Attr("transformed_width"); auto spatial_scale = ctx.Attr("spatial_scale"); @@ -268,7 +268,7 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel { const T* input_data = in->data(); int* mask_data = mask->mutable_data(ctx.GetPlace()); - framework::Tensor roi2image; + phi::DenseTensor roi2image; roi2image.Resize({rois_num}); int* roi2image_data = roi2image.mutable_data(ctx.GetPlace()); auto lod = rois->lod().back(); @@ -397,11 +397,10 @@ template class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); - auto* in_grad = ctx.Output(framework::GradVarName("X")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* in_grad = ctx.Output(framework::GradVarName("X")); auto transformed_height = ctx.Attr("transformed_height"); auto transformed_width = ctx.Attr("transformed_width"); @@ -418,7 +417,7 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel { const T* out_grad_data = out_grad->data(); const T* rois_data = rois->data(); - framework::Tensor roi2image; + phi::DenseTensor roi2image; roi2image.Resize({rois_num}); int* roi2image_data = roi2image.mutable_data(ctx.GetPlace()); auto lod = rois->lod().back(); diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu index 73b28f8f0e476..dee0a8e69d6d8 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu @@ -367,14 +367,14 @@ template class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - auto* out2in_idx = ctx.Output("Out2InIdx"); - auto* out2in_w = ctx.Output("Out2InWeights"); - auto* mask = ctx.Output("Mask"); + auto* out = ctx.Output("Out"); + auto* out2in_idx = ctx.Output("Out2InIdx"); + auto* out2in_w = ctx.Output("Out2InWeights"); + auto* mask = ctx.Output("Mask"); auto* out_transform_matrix = - ctx.Output("TransformMatrix"); + ctx.Output("TransformMatrix"); int* mask_data = mask->mutable_data(ctx.GetPlace()); int* out2in_idx_data = @@ -400,8 +400,8 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel { T* output_data = out->mutable_data(ctx.GetPlace()); const T* rois_data = rois->data(); - framework::Tensor roi2image; - framework::Tensor roi2image_dev; + phi::DenseTensor roi2image; + phi::DenseTensor roi2image_dev; roi2image.Resize({rois_num}); int* roi2image_data = roi2image.mutable_data(platform::CPUPlace()); auto lod = rois->lod().back(); @@ -513,9 +513,8 @@ class CUDAROIPerspectiveTransformGradOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* out2in_idx = ctx.Input("Out2InIdx"); auto* out2in_w = ctx.Input("Out2InWeights"); - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); - auto* in_grad = ctx.Output(framework::GradVarName("X")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* in_grad = ctx.Output(framework::GradVarName("X")); T* in_grad_data = in_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index c6e4c00f79bba..f60cef3d1b554 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template -void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) { +void AppendRpns(LoDTensor* out, int64_t offset, phi::DenseTensor* to_add) { auto* out_data = out->data(); auto* to_add_data = to_add->data(); memcpy(out_data + offset, to_add_data, to_add->numel() * sizeof(T)); @@ -113,7 +113,7 @@ void AppendRpns(LoDTensor* out, int64_t offset, Tensor* to_add) { template std::vector FilterStraddleAnchor(const phi::CPUContext& context, - const Tensor* anchor, + const phi::DenseTensor* anchor, const float rpn_straddle_thresh, T im_height, T im_width) { @@ -154,8 +154,8 @@ std::vector FilterStraddleAnchor(const phi::CPUContext& context, template Tensor FilterCrowdGt(const phi::CPUContext& context, - Tensor* gt_boxes, - Tensor* is_crowd) { + phi::DenseTensor* gt_boxes, + phi::DenseTensor* is_crowd) { int gt_num = gt_boxes->dims()[0]; std::vector not_crowd_inds; auto* is_crowd_data = is_crowd->data(); @@ -196,8 +196,8 @@ void ReservoirSampling(const int num, template void ScoreAssign(const T* anchor_by_gt_overlap_data, - const Tensor& anchor_to_gt_max, - const Tensor& gt_to_anchor_max, + const phi::DenseTensor& anchor_to_gt_max, + const phi::DenseTensor& gt_to_anchor_max, const int rpn_batch_size_per_im, const float rpn_fg_fraction, const float rpn_positive_overlap, @@ -299,14 +299,15 @@ void ScoreAssign(const T* anchor_by_gt_overlap_data, } template -std::vector SampleRpnFgBgGt(const phi::CPUContext& ctx, - const Tensor& anchor_by_gt_overlap, - const int rpn_batch_size_per_im, - const float rpn_positive_overlap, - const float rpn_negative_overlap, - const float rpn_fg_fraction, - std::minstd_rand engine, - bool use_random) { +std::vector SampleRpnFgBgGt( + const phi::CPUContext& ctx, + const phi::DenseTensor& anchor_by_gt_overlap, + const int rpn_batch_size_per_im, + const float rpn_positive_overlap, + const float rpn_negative_overlap, + const float rpn_fg_fraction, + std::minstd_rand engine, + bool use_random) { auto* anchor_by_gt_overlap_data = anchor_by_gt_overlap.data(); int anchor_num = anchor_by_gt_overlap.dims()[0]; int gt_num = anchor_by_gt_overlap.dims()[1]; @@ -393,7 +394,7 @@ template class RpnTargetAssignKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 + auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 auto* gt_boxes = context.Input("GtBoxes"); auto* is_crowd = context.Input("IsCrowd"); auto* im_info = context.Input("ImInfo"); @@ -857,9 +858,9 @@ class RetinanetTargetAssignOp : public framework::OperatorWithKernel { template std::vector FilterCrowdGtBoxLabel(const phi::CPUContext& context, - Tensor* gt_boxes, - Tensor* gt_labels, - Tensor* is_crowd) { + phi::DenseTensor* gt_boxes, + phi::DenseTensor* gt_labels, + phi::DenseTensor* is_crowd) { int gt_num = gt_boxes->dims()[0]; std::vector not_crowd_inds; auto* is_crowd_data = is_crowd->data(); @@ -892,8 +893,8 @@ std::vector FilterCrowdGtBoxLabel(const phi::CPUContext& context, template std::vector GetAllFgBgGt(const phi::CPUContext& ctx, - const Tensor& anchor_by_gt_overlap, - const Tensor& ncrowd_gt_labels, + const phi::DenseTensor& anchor_by_gt_overlap, + const phi::DenseTensor& ncrowd_gt_labels, const float positive_overlap, const float negative_overlap, std::minstd_rand engine) { @@ -992,7 +993,7 @@ template class RetinanetTargetAssignKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 + auto* anchor = context.Input("Anchor"); // (H*W*A) * 4 auto* gt_boxes = context.Input("GtBoxes"); auto* gt_labels = context.Input("GtLabels"); auto* is_crowd = context.Input("IsCrowd"); diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc index bc23c5105db94..91479a78b63b1 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class SigmoidFocalLossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu index 3def90fd459e5..bad93fd22b2e9 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cu @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; @@ -119,10 +119,10 @@ template class GPUSigmoidFocalLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - Tensor *Out = context.Output("Out"); + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + Tensor *Out = context.Output("Out"); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); auto x_dims = X->dims(); @@ -150,11 +150,12 @@ template class GPUSigmoidFocalLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - const Tensor *dOut = context.Input(framework::GradVarName("Out")); - Tensor *dX = context.Output(framework::GradVarName("X")); + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + const Tensor *dOut = + context.Input(framework::GradVarName("Out")); + Tensor *dX = context.Output(framework::GradVarName("X")); auto dx_data = dX->mutable_data(context.GetPlace()); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h index 4ad9743cfca94..b7c77a5e28222 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h @@ -22,16 +22,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SigmoidFocalLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - Tensor *Out = context.Output("Out"); + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + Tensor *Out = context.Output("Out"); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); auto out_data = Out->mutable_data(context.GetPlace()); @@ -79,11 +79,12 @@ template class SigmoidFocalLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const Tensor *X = context.Input("X"); - const Tensor *Labels = context.Input("Label"); - const Tensor *FgNum = context.Input("FgNum"); - const Tensor *dOut = context.Input(framework::GradVarName("Out")); - Tensor *dX = context.Output(framework::GradVarName("X")); + const Tensor *X = context.Input("X"); + const Tensor *Labels = context.Input("Label"); + const Tensor *FgNum = context.Input("FgNum"); + const Tensor *dOut = + context.Input(framework::GradVarName("Out")); + Tensor *dX = context.Output(framework::GradVarName("X")); auto dx_data = dX->mutable_data(context.GetPlace()); T gamma = static_cast(context.Attr("gamma")); T alpha = static_cast(context.Attr("alpha")); diff --git a/paddle/fluid/operators/detection/target_assign_op.h b/paddle/fluid/operators/detection/target_assign_op.h index 55481dc3e8166..a7c66bcf02e07 100644 --- a/paddle/fluid/operators/detection/target_assign_op.h +++ b/paddle/fluid/operators/detection/target_assign_op.h @@ -97,10 +97,10 @@ class TargetAssignKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); - auto* match_indices = ctx.Input("MatchIndices"); + auto* match_indices = ctx.Input("MatchIndices"); - auto* out = ctx.Output("Out"); - auto* out_wt = ctx.Output("OutWeight"); + auto* out = ctx.Output("Out"); + auto* out_wt = ctx.Output("OutWeight"); PADDLE_ENFORCE_EQ(x->lod().size(), 1UL, diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc index 3261f8fca3d20..257347f663c68 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ b/paddle/fluid/operators/detection/yolo_box_op.cc @@ -17,8 +17,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class YoloBoxOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc index 0448d7e5183c8..5f6ffece3bf54 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -21,8 +21,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class Yolov3LossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 8c30ae28f4e76..51fdd4ad1f2ec 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class DetectionMAPOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index 3ed55f6697f1a..cde33cd956419 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -62,13 +62,13 @@ class DetectionMAPOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in_detect = ctx.Input("DetectRes"); auto* in_label = ctx.Input("Label"); - auto* out_map = ctx.Output("MAP"); + auto* out_map = ctx.Output("MAP"); - auto* in_pos_count = ctx.Input("PosCount"); + auto* in_pos_count = ctx.Input("PosCount"); auto* in_true_pos = ctx.Input("TruePos"); auto* in_false_pos = ctx.Input("FalsePos"); - auto* out_pos_count = ctx.Output("AccumPosCount"); + auto* out_pos_count = ctx.Output("AccumPosCount"); auto* out_true_pos = ctx.Output("AccumTruePos"); auto* out_false_pos = ctx.Output("AccumFalsePos"); @@ -241,7 +241,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { const std::map& label_pos_count, const std::map>>& true_pos, const std::map>>& false_pos, - framework::Tensor* output_pos_count, + phi::DenseTensor* output_pos_count, framework::LoDTensor* output_true_pos, framework::LoDTensor* output_false_pos, const int class_num) const { @@ -307,7 +307,7 @@ class DetectionMAPOpKernel : public framework::OpKernel { output_false_pos->set_lod(false_pos_lod); } - void GetInputPos(const framework::Tensor& input_pos_count, + void GetInputPos(const phi::DenseTensor& input_pos_count, const framework::LoDTensor& input_true_pos, const framework::LoDTensor& input_false_pos, std::map* label_pos_count, diff --git a/paddle/fluid/operators/dgc_clip_by_norm_op.cc b/paddle/fluid/operators/dgc_clip_by_norm_op.cc index 9949fefb1b18b..7c75949039358 100644 --- a/paddle/fluid/operators/dgc_clip_by_norm_op.cc +++ b/paddle/fluid/operators/dgc_clip_by_norm_op.cc @@ -33,7 +33,7 @@ class DGCClipByNormOp : public ClipByNormOp { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "current_step") { VLOG(10) << "var_name:" << var_name << " need not to transform"; diff --git a/paddle/fluid/operators/dgc_clip_by_norm_op.h b/paddle/fluid/operators/dgc_clip_by_norm_op.h index 27c30a8997b2c..8637ac88a422d 100644 --- a/paddle/fluid/operators/dgc_clip_by_norm_op.h +++ b/paddle/fluid/operators/dgc_clip_by_norm_op.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class DGCClipByNormKernel : public framework::OpKernel { @@ -32,7 +32,7 @@ class DGCClipByNormKernel : public framework::OpKernel { return; } - auto current_step_tensor = ctx.Input("current_step"); + auto current_step_tensor = ctx.Input("current_step"); auto* current_step = current_step_tensor->data(); VLOG(10) << "current_step:" << *current_step @@ -50,8 +50,8 @@ class DGCClipByNormKernel : public framework::OpKernel { auto& dev_ctx = ctx.device_context(); if (in_var->IsType()) { - auto* x = ctx.Input("X"); - auto* y = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Output("Out"); return phi::ClipByNormKernel( static_cast::TYPE&>(dev_ctx), diff --git a/paddle/fluid/operators/dgc_op.cc b/paddle/fluid/operators/dgc_op.cc index 1f7b5dbdce9c8..e247ab05ebadd 100644 --- a/paddle/fluid/operators/dgc_op.cc +++ b/paddle/fluid/operators/dgc_op.cc @@ -47,7 +47,7 @@ class DGCOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "current_step" || var_name == "k" || var_name == "nranks") { VLOG(10) << "var_name:" << var_name << " need not to transform"; diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index 82e002cbb3389..44121a9434c72 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -53,11 +53,11 @@ template class DGCOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto u = ctx.Input("U"); - auto v = ctx.Input("V"); - auto g = ctx.Input("Grad"); + auto u = ctx.Input("U"); + auto v = ctx.Input("V"); + auto g = ctx.Input("Grad"); - auto grad_out = ctx.Output("Grad_out"); + auto grad_out = ctx.Output("Grad_out"); // attrs float m = ctx.Attr("m"); @@ -67,7 +67,7 @@ class DGCOpKernel : public framework::OpKernel { auto rampup_step = ctx.Attr("rampup_step"); // nranks - auto nranks_tensor = ctx.Input("nranks"); + auto nranks_tensor = ctx.Input("nranks"); const int nranks = static_cast(*nranks_tensor->data()); PADDLE_ENFORCE_GT(nranks, 1, @@ -76,7 +76,7 @@ class DGCOpKernel : public framework::OpKernel { "use multi card or multi machine GPU")); // regularization - auto p = ctx.Input("Param"); + auto p = ctx.Input("Param"); float regular_coeff = ctx.Attr("regular_coeff"); int regular_type = ctx.Attr("regular_type"); @@ -110,7 +110,7 @@ class DGCOpKernel : public framework::OpKernel { } // current step - auto current_step_tensor = ctx.Input("current_step"); + auto current_step_tensor = ctx.Input("current_step"); const float* current_step = current_step_tensor->data(); if (static_cast(*current_step) < static_cast(rampup_begin_step)) { @@ -140,14 +140,14 @@ class DGCOpKernel : public framework::OpKernel { << ", current_step:" << *current_step << ", ratio:" << ratio << ", k:" << k << ", nranks:" << nranks; - auto k_out = ctx.Output("k"); + auto k_out = ctx.Output("k"); T* k_out_data = k_out->data(); *k_out_data = k; - auto u_out = ctx.Output("U_out"); - auto v_out = ctx.Output("V_out"); - auto encode_grad_out = ctx.Output("EncodeGrad"); - auto gather_buff = ctx.Output("GatherBuff"); + auto u_out = ctx.Output("U_out"); + auto v_out = ctx.Output("V_out"); + auto encode_grad_out = ctx.Output("EncodeGrad"); + auto gather_buff = ctx.Output("GatherBuff"); // FIXME(gongwb): use cublas. auto u_out_e = framework::EigenVector::Flatten(*u_out); diff --git a/paddle/fluid/operators/diag_op.h b/paddle/fluid/operators/diag_op.h index 9c9ff69586a1f..e3514e59e806d 100644 --- a/paddle/fluid/operators/diag_op.h +++ b/paddle/fluid/operators/diag_op.h @@ -39,10 +39,10 @@ template class DiagKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* diagonal = context.Input("Diagonal"); + auto* diagonal = context.Input("Diagonal"); auto* diag_data = diagonal->data(); auto numel = diagonal->numel(); - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); T* out_data = out->mutable_data(context.GetPlace()); phi::funcs::SetConstant set_zero; diff --git a/paddle/fluid/operators/dropout_impl.cu.h b/paddle/fluid/operators/dropout_impl.cu.h index bc3eedacc5b0d..413d02e3b6738 100644 --- a/paddle/fluid/operators/dropout_impl.cu.h +++ b/paddle/fluid/operators/dropout_impl.cu.h @@ -267,25 +267,25 @@ __global__ void VectorizedGeneratorMask(const size_t n, } inline void CalcBroadcastedMask(const phi::GPUContext& dev_ctx, - const framework::Tensor& mask, - framework::Tensor* broadcasted_mask) { + const phi::DenseTensor& mask, + phi::DenseTensor* broadcasted_mask) { // The broadcast of mask can be combined to the following ElementwiseKernel // when the BroadcastKernel supports different input types. broadcasted_mask->mutable_data(dev_ctx.GetPlace()); - std::vector ins = {&mask}; - std::vector outs = {broadcasted_mask}; + std::vector ins = {&mask}; + std::vector outs = {broadcasted_mask}; phi::funcs::BroadcastKernel( dev_ctx, ins, &outs, -1, kps::IdentityFunctor()); } template void ScaleByDropoutFactor(const phi::GPUContext& dev_ctx, - const framework::Tensor& x, - framework::Tensor* y, + const phi::DenseTensor& x, + phi::DenseTensor* y, MT factor) { - std::vector ins = {&x}; - std::vector outs = {y}; + std::vector ins = {&x}; + std::vector outs = {y}; auto functor = phi::funcs::ScaleFunctor(factor); phi::funcs::ElementwiseKernel(dev_ctx, ins, &outs, functor); } @@ -297,10 +297,10 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, bool upscale_in_train, bool is_fix_seed, int seed_val, - const framework::Tensor& x, - const framework::Tensor* seed, - framework::Tensor* mask, - framework::Tensor* y, + const phi::DenseTensor& x, + const phi::DenseTensor* seed, + phi::DenseTensor* mask, + phi::DenseTensor* y, bool is_dropout_nd = false) { int64_t x_numel = x.numel(); auto stream = dev_ctx.stream(); @@ -359,14 +359,14 @@ void DropoutFwGPUKernelDriver(const phi::GPUContext& dev_ctx, increment, main_offset); - framework::Tensor broadcasted_mask; + phi::DenseTensor broadcasted_mask; broadcasted_mask.Resize(x.dims()); CalcBroadcastedMask(dev_ctx, *mask, &broadcasted_mask); auto dst_functor = DstFunctor( 1.0f - dropout_prob, upscale_in_train, x_numel); - std::vector ins = {&x, &broadcasted_mask}; - std::vector outs = {y}; + std::vector ins = {&x, &broadcasted_mask}; + std::vector outs = {y}; phi::funcs::ElementwiseKernel(dev_ctx, ins, &outs, dst_functor); } else { #define PD_DROPOUT_KERNEL_NAME VectorizedRandomGenerator @@ -424,9 +424,9 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx, bool is_test, float dropout_prob, bool upscale_in_train, - const framework::Tensor& grad_y, - const framework::Tensor& mask, - framework::Tensor* grad_x, + const phi::DenseTensor& grad_y, + const phi::DenseTensor& mask, + phi::DenseTensor* grad_x, bool is_dropout_nd = false) { using MT = typename details::MPTypeTrait::Type; @@ -436,15 +436,15 @@ void DropoutGradGPUKernelDriver(const phi::GPUContext& dev_ctx, // y = factor * x ScaleByDropoutFactor(dev_ctx, grad_y, grad_x, factor); } else { - framework::Tensor broadcasted_mask; + phi::DenseTensor broadcasted_mask; if (is_dropout_nd) { broadcasted_mask.Resize(grad_y.dims()); CalcBroadcastedMask(dev_ctx, mask, &broadcasted_mask); } - std::vector ins = { + std::vector ins = { &grad_y, is_dropout_nd ? &broadcasted_mask : &mask}; - std::vector outs = {grad_x}; + std::vector outs = {grad_x}; if (upscale_in_train) { if (dropout_prob == 1.0f) { #ifdef PADDLE_WITH_HIP diff --git a/paddle/fluid/operators/dropout_impl_util.h b/paddle/fluid/operators/dropout_impl_util.h index 88e492efcc45a..84ff221cbe139 100644 --- a/paddle/fluid/operators/dropout_impl_util.h +++ b/paddle/fluid/operators/dropout_impl_util.h @@ -21,7 +21,7 @@ namespace paddle { namespace operators { inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx, - const framework::Tensor* seed, + const phi::DenseTensor* seed, const bool is_fix_seed, const int seed_val, const int offset, @@ -31,7 +31,7 @@ inline void GetSeedDataAndIncrement(const phi::GPUContext& dev_ctx, auto gen_cuda = framework::DefaultCUDAGenerator(device_id); if (seed) { - framework::Tensor seed_cpu_tensor; + phi::DenseTensor seed_cpu_tensor; paddle::framework::TensorCopySync( *seed, platform::CPUPlace(), &seed_cpu_tensor); *seed_data = static_cast(seed_cpu_tensor.data()[0]); diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 43ae066b527fd..804834a974aad 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class DropoutOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -37,7 +35,7 @@ class DropoutOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "Seed") { VLOG(10) << "var_name:" << var_name diff --git a/paddle/fluid/operators/dropout_op_mlu.cc b/paddle/fluid/operators/dropout_op_mlu.cc index 142e047e6c2b1..7cf98738d073f 100644 --- a/paddle/fluid/operators/dropout_op_mlu.cc +++ b/paddle/fluid/operators/dropout_op_mlu.cc @@ -18,18 +18,18 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class DropoutMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto dropout_prob = ctx.Attr("dropout_prob"); auto is_test = ctx.Attr("is_test"); auto* seed_tensor = - ctx.HasInput("Seed") ? ctx.Input("Seed") : nullptr; + ctx.HasInput("Seed") ? ctx.Input("Seed") : nullptr; auto dropout_implementation = ctx.Attr("dropout_implementation"); @@ -65,7 +65,7 @@ class DropoutMLUKernel : public framework::OpKernel { seed_data = ctx.Attr("fix_seed") ? ctx.Attr("seed") : 0; } - auto* mask = ctx.Output("Mask"); + auto* mask = ctx.Output("Mask"); mask->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc mask_desc(*mask); // Special case when dropout_prob is 1.0 @@ -137,9 +137,9 @@ class DropoutGradMLUKernel : public framework::OpKernel { true, platform::errors::InvalidArgument( "GradOp is only callable when is_test is false")); - auto* grad_x = ctx.Output(framework::GradVarName("X")); - auto* grad_out = ctx.Input(framework::GradVarName("Out")); - auto* mask = ctx.Input("Mask"); + auto* grad_x = ctx.Output(framework::GradVarName("X")); + auto* grad_out = ctx.Input(framework::GradVarName("Out")); + auto* mask = ctx.Input("Mask"); auto dropout_prob = ctx.Attr("dropout_prob"); auto dropout_impl = ctx.Attr("dropout_implementation"); diff --git a/paddle/fluid/operators/dropout_op_npu.cc b/paddle/fluid/operators/dropout_op_npu.cc index 96e2b6d956777..a63b6e5e479af 100644 --- a/paddle/fluid/operators/dropout_op_npu.cc +++ b/paddle/fluid/operators/dropout_op_npu.cc @@ -23,17 +23,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class DropoutNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); auto* seed_tensor = - ctx.HasInput("Seed") ? ctx.Input("Seed") : nullptr; - auto* out = ctx.Output("Out"); - auto* mask = ctx.Output("Mask"); + ctx.HasInput("Seed") ? ctx.Input("Seed") : nullptr; + auto* out = ctx.Output("Out"); + auto* mask = ctx.Output("Mask"); auto dropout_prob = ctx.Attr("dropout_prob"); auto is_test = ctx.Attr("is_test"); @@ -151,9 +151,9 @@ template class DropoutGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* mask = ctx.Input("Mask"); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* mask = ctx.Input("Mask"); auto dropout_prob = ctx.Attr("dropout_prob"); auto is_test = ctx.Attr("is_test"); diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index c9c81f2ed8a61..5fba57c037977 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -40,9 +40,7 @@ namespace paddle { namespace operators { -using paddle::framework::Tensor; - -inline int BatchCount(const Tensor& matrix) { +inline int BatchCount(const phi::DenseTensor& matrix) { int count = 1; int num_dims = matrix.dims().size(); for (int i = 0; i < num_dims - 2; ++i) { @@ -51,7 +49,7 @@ inline int BatchCount(const Tensor& matrix) { return count; } -inline int MatrixStride(const Tensor& matrix) { +inline int MatrixStride(const phi::DenseTensor& matrix) { framework::DDim dims_list = matrix.dims(); int num_dims = dims_list.size(); return dims_list[num_dims - 1] * dims_list[num_dims - 2]; diff --git a/paddle/fluid/operators/eigh_op.cc b/paddle/fluid/operators/eigh_op.cc index c85a7d842e4f9..4d2982b314a1c 100644 --- a/paddle/fluid/operators/eigh_op.cc +++ b/paddle/fluid/operators/eigh_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class EighOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/eigvalsh_op.cc b/paddle/fluid/operators/eigvalsh_op.cc index 9ba892b61badf..9d09b96280e2f 100644 --- a/paddle/fluid/operators/eigvalsh_op.cc +++ b/paddle/fluid/operators/eigvalsh_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class EigvalshOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc index 9e3da4ed6af80..456a11f95aaca 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_mlu.cc @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseAddMLUKernel : public framework::OpKernel { @@ -32,11 +32,11 @@ class ElementwiseAddGradMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); axis = (axis == -1 ? std::abs(x->dims().size() - y->dims().size()) : axis); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc index 726b4186030d2..70e3de7a0bcd9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_npu.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseAddNPUKernel : public framework::OpKernel { @@ -69,11 +69,11 @@ class ElementwiseAddGradNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); axis = (axis == -1 ? std::abs(x->dims().size() - y->dims().size()) : axis); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index 3c03b54b6f98f..b1f0817539f17 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -24,7 +24,7 @@ namespace operators { class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext* ctx) const override { auto y_grad_name = framework::GradVarName("Y"); @@ -59,7 +59,7 @@ class ElementwiseDivOpDoubleGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc index 306ee1952dbe1..27f7281b9fb1e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_mlu.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseDivMLUKernel : public framework::OpKernel { @@ -35,12 +35,12 @@ template class ElementwiseDivGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* out = ctx.Input("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); const auto& x_dims = x->dims(); diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc index 9ae7782ca01ea..74a2a5b6ca6eb 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op_npu.cc @@ -21,16 +21,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseDivNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -49,13 +49,13 @@ template class ElementwiseDivGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Input("Out"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc index 79e283e1ffd35..396f1b6f6223a 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op_npu.cc @@ -21,15 +21,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseFloorDivNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc index 7cd1f70494256..fe91c28cd1f05 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseMaxNPUKernel : public framework::OpKernel { @@ -27,9 +27,9 @@ class ElementwiseMaxNPUKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); int axis = ctx.Attr("axis"); @@ -67,11 +67,11 @@ class ElementwiseMaxGradNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); // The ascend elementwise_max_grad op only supports broadcast diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc index 7b29f8e4cd3f3..861ed2046c077 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_mlu.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseMinMLUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc index e34b88189d3bc..8014f82ca5742 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseMinNPUKernel : public framework::OpKernel { @@ -30,10 +30,10 @@ class ElementwiseMinNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); @@ -71,11 +71,11 @@ class ElementwiseMinGradNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); axis = (axis == -1 ? std::abs(x->dims().size() - y->dims().size()) : axis); auto stream = dev_ctx.stream(); diff --git a/paddle/fluid/operators/elementwise/elementwise_mlu.h b/paddle/fluid/operators/elementwise/elementwise_mlu.h index 50085f531a99d..57f4b0c057686 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mlu.h +++ b/paddle/fluid/operators/elementwise/elementwise_mlu.h @@ -77,9 +77,9 @@ void MLUOpTensorKernel(const framework::ExecutionContext& ctx, platform::errors::Unavailable( "This kernel of MLU only support ADD, SUB, MUL.")); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); int axis = ctx.Attr("axis"); @@ -186,9 +186,9 @@ inline void MLUBinary(const framework::ExecutionContext& ctx, template void MLUBinaryOp(const framework::ExecutionContext& ctx) { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); int axis = ctx.Attr("axis"); @@ -259,8 +259,8 @@ inline void MLUUnary(const framework::ExecutionContext& ctx, template void MLUUnaryOp(const framework::ExecutionContext& ctx) { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -283,11 +283,11 @@ enum MINMAX_GRAD_FUNCTOR { }; template void MLUMinMaxGradHelper(const framework::ExecutionContext& ctx) { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); const auto& x_dims = x->dims(); diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc index 5de2f6509fe40..bdeef48389b6c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseModNPUKernel : public framework::OpKernel { @@ -26,9 +26,9 @@ class ElementwiseModNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); int axis = ctx.Attr("axis"); auto x_dims = x->dims(); diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index ffd36412e3ebe..afc06b0d9981b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -25,7 +25,7 @@ namespace operators { class ElementwiseMulOp : public ElementwiseOp { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; using ElementwiseOp::ElementwiseOp; framework::OpKernelType GetExpectedKernelType( @@ -46,7 +46,7 @@ class ElementwiseMulOp : public ElementwiseOp { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc index 0c41dc40cdc12..fe2848621c76f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_mlu.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using MLUDeviceContext = platform::MLUDeviceContext; template @@ -32,11 +32,11 @@ template class ElementwiseMulGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); const auto& x_dims = x->dims(); diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc index fa6fd9c422e81..4fc3be1b29cc7 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op_npu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template @@ -28,8 +28,8 @@ static void ReduceDims(const framework::ExecutionContext& ctx, const int axis, const framework::DDim& ddims, const framework::DDim& brd_ddims, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { std::vector axes; int64_t brd_size = brd_ddims.size(); int64_t org_size = ddims.size(); @@ -55,9 +55,9 @@ class ElementwiseMulNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); int axis = ctx.Attr("axis"); @@ -91,11 +91,11 @@ class ElementwiseMulGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); axis = (axis == -1 ? std::abs(x->dims().size() - y->dims().size()) : axis); diff --git a/paddle/fluid/operators/elementwise/elementwise_npu.h b/paddle/fluid/operators/elementwise/elementwise_npu.h index e4b6998a8f3fe..5266491d6f506 100644 --- a/paddle/fluid/operators/elementwise/elementwise_npu.h +++ b/paddle/fluid/operators/elementwise/elementwise_npu.h @@ -20,14 +20,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void NpuBroadcast(const platform::NPUDeviceContext& dev_ctx, - const Tensor* src, + const phi::DenseTensor* src, int axis, const framework::DDim& dst_dims, - Tensor* transformed_src) { + phi::DenseTensor* transformed_src) { auto stream = dev_ctx.stream(); // 1. expand the axis with dim 1 @@ -96,11 +96,11 @@ void NpuBroadcast(const platform::NPUDeviceContext& dev_ctx, template void NpuElementWiseOpBroadcast(const platform::NPUDeviceContext& dev_ctx, - const Tensor* x, - const Tensor* y, + const phi::DenseTensor* x, + const phi::DenseTensor* y, int axis, - Tensor* transformed_x, - Tensor* transformed_y) { + phi::DenseTensor* transformed_x, + phi::DenseTensor* transformed_y) { auto x_dims = x->dims(); auto y_dims = y->dims(); bool is_xsize_larger = true; diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h index e722d5f7e6e99..dc054579dc181 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_op.h @@ -36,7 +36,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ElementwiseOp"); @@ -170,7 +170,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input @@ -292,7 +292,7 @@ For example: class ElementwiseOpGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { auto out_grad_name = framework::GradVarName("Out"); @@ -331,7 +331,7 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input @@ -349,7 +349,7 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { class ElementwiseOpDoubleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { auto x_grad_name = framework::GradVarName("X"); @@ -385,7 +385,7 @@ class ElementwiseOpDoubleGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input @@ -404,7 +404,7 @@ class ElementwiseOpDoubleGradWithoutDXDY : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { if (ctx->HasOutput("DDOut")) { @@ -446,7 +446,7 @@ class ElementwiseOpDoubleGradWithoutDXDY framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input @@ -464,7 +464,7 @@ class ElementwiseOpDoubleGradWithoutDXDY class ElementwiseOpTripleGrad : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void InferShape(framework::InferShapeContext *ctx) const override { if (ctx->HasOutput("D_DDX")) { @@ -507,7 +507,7 @@ class ElementwiseOpTripleGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input diff --git a/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h b/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h index dbb555a0a6a13..25e22f9e2895c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h @@ -26,8 +26,8 @@ template void LaunchElementwiseCudaKernel( const KPDevice &ctx, - const std::vector &ins, - std::vector *outs, + const std::vector &ins, + std::vector *outs, int axis, Functor func) { std::vector pt_inputs; diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index f81b76aa4877c..2abb15c98ce1f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -69,9 +69,9 @@ namespace operators { */ template int PackTensorsIntoVector(const framework::ExecutionContext &ctx, - std::vector *ins, - std::vector *outs, - framework::Tensor *x_for_selectedrows = nullptr) { + std::vector *ins, + std::vector *outs, + phi::DenseTensor *x_for_selectedrows = nullptr) { int axis = -1; auto x_var = ctx.InputVar("X"); PADDLE_ENFORCE_NOT_NULL( @@ -80,7 +80,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx, "Unable to get input Variable X, Variable name is %s.\n", ctx.InputName("X"))); auto *y = ctx.Input("Y"); - framework::Tensor *z; + phi::DenseTensor *z; if (x_var->IsType()) { auto *x = ctx.Input("X"); @@ -152,13 +152,13 @@ template void ElemwiseGradCompute(const framework::ExecutionContext &ctx, - const framework::Tensor &x, - const framework::Tensor &y, - const framework::Tensor &out, - const framework::Tensor &dout, + const phi::DenseTensor &x, + const phi::DenseTensor &y, + const phi::DenseTensor &out, + const phi::DenseTensor &dout, int axis, - framework::Tensor *dx, - framework::Tensor *dy, + phi::DenseTensor *dx, + phi::DenseTensor *dy, DX_OP dx_op, DY_OP dy_op) { const auto &dev_ctx = ctx.template device_context(); @@ -180,11 +180,11 @@ template void ElementwiseComputeEx(const framework::ExecutionContext &ctx, - const framework::Tensor *x, - const framework::Tensor *y, + const phi::DenseTensor *x, + const phi::DenseTensor *y, int axis, Functor func, - framework::Tensor *z) { + phi::DenseTensor *z) { z->mutable_data(ctx.GetPlace()); const auto &dev_ctx = ctx.template device_context(); phi::funcs::ElementwiseCompute( @@ -468,11 +468,11 @@ template (phi::product(x_dim)); platform::ForRange for_range( @@ -499,12 +499,12 @@ void FusedElemwiseAndActComputeWithBroadcast( const framework::ExecutionContext &ctx, const framework::DDim &x_dim, const framework::DDim &y_dim_untrimed, - const framework::Tensor &x, - const framework::Tensor &y, + const phi::DenseTensor &x, + const phi::DenseTensor &y, CompoundFunctor compound_functor, int axis, - framework::Tensor *out, - framework::Tensor *intermediate_out) { + phi::DenseTensor *out, + phi::DenseTensor *intermediate_out) { axis = (axis == -1 ? x_dim.size() - y_dim_untrimed.size() : axis); auto y_dim = trim_trailing_singular_dims(y_dim_untrimed); axis = (y_dim.size() == 0) ? x_dim.size() : axis; @@ -642,15 +642,15 @@ void FusedElemwiseAndActGradComputeNoBroadcast( const framework::ExecutionContext &ctx, const framework::DDim &x_dim, const framework::DDim &y_dim, - const framework::Tensor *x, - const framework::Tensor *y, - const framework::Tensor *intermediate_out, - const framework::Tensor *out, - const framework::Tensor *dout, + const phi::DenseTensor *x, + const phi::DenseTensor *y, + const phi::DenseTensor *intermediate_out, + const phi::DenseTensor *out, + const phi::DenseTensor *dout, int axis, - framework::Tensor *dx, - framework::Tensor *dy, - framework::Tensor *dintermediate, + phi::DenseTensor *dx, + phi::DenseTensor *dy, + phi::DenseTensor *dintermediate, DX_OP dx_op, DY_OP dy_op, DIntermediate_OP dintermediate_op) { @@ -1244,15 +1244,15 @@ void FusedElemwiseAndActGradComputeWithBroadcast( const framework::ExecutionContext &ctx, const framework::DDim &x_dim, const framework::DDim &y_dim_untrimed, - const framework::Tensor *x, - const framework::Tensor *y, - const framework::Tensor *intermediate_out, - const framework::Tensor *out, - const framework::Tensor *dout, + const phi::DenseTensor *x, + const phi::DenseTensor *y, + const phi::DenseTensor *intermediate_out, + const phi::DenseTensor *out, + const phi::DenseTensor *dout, int axis, - framework::Tensor *dx, - framework::Tensor *dy, - framework::Tensor *dintermediate, + phi::DenseTensor *dx, + phi::DenseTensor *dy, + phi::DenseTensor *dintermediate, DX_OP dx_op, DY_OP dy_op, DIntermediate_OP dintermediate_op) { @@ -1385,15 +1385,15 @@ template void FusedElemwiseAndActGradComputeEx(const framework::ExecutionContext &ctx, - const framework::Tensor *x, - const framework::Tensor *y, - const framework::Tensor *out, - const framework::Tensor *intermediate_out, - const framework::Tensor *dout, + const phi::DenseTensor *x, + const phi::DenseTensor *y, + const phi::DenseTensor *out, + const phi::DenseTensor *intermediate_out, + const phi::DenseTensor *dout, int axis, - framework::Tensor *dx, - framework::Tensor *dy, - framework::Tensor *dintermediate, + phi::DenseTensor *dx, + phi::DenseTensor *dy, + phi::DenseTensor *dintermediate, DX_OP dx_op, DY_OP dy_op, DIntermediate_OP dintermediate_op) { @@ -1497,12 +1497,12 @@ template void FusedElemwiseAndActComputeEx(const framework::ExecutionContext &ctx, - const framework::Tensor &x, - const framework::Tensor &y, + const phi::DenseTensor &x, + const phi::DenseTensor &y, int axis, CompoundFunctor compound_functor, - framework::Tensor *out, - framework::Tensor *intermediate_out) { + phi::DenseTensor *out, + phi::DenseTensor *intermediate_out) { if (KeepIntermediateOut) { PADDLE_ENFORCE_NOT_NULL( intermediate_out, @@ -1578,9 +1578,9 @@ void FusedElemwiseAndActComputeEx(const framework::ExecutionContext &ctx, template static inline void GetDoubleGradSafeTensor( const framework::ExecutionContext &ctx, - const framework::Tensor *x, - const framework::Tensor *ddx, - framework::Tensor *ddx_safe) { + const phi::DenseTensor *x, + const phi::DenseTensor *ddx, + phi::DenseTensor *ddx_safe) { const auto &dev_ctx = ctx.template device_context(); phi::funcs::GetDoubleGradSafeTensor( dev_ctx, *x, ddx, ddx_safe); @@ -1599,10 +1599,10 @@ template void GetGradXAndYOut(const phi::GPUContext &dev_ctx, const platform::Place &place, int axis, - std::vector ins, - const framework::Tensor *dout, - framework::Tensor *dx, - framework::Tensor *dy, + std::vector ins, + const phi::DenseTensor *dout, + phi::DenseTensor *dx, + phi::DenseTensor *dy, Functor func) { phi::GetGradXAndYOut( dev_ctx, place, axis, ins, *dout, dx, dy, func); @@ -1612,9 +1612,9 @@ template void GetGradXOrYOut(const phi::GPUContext &dev_ctx, const platform::Place &place, int axis, - std::vector ins, - const framework::Tensor *dout, - framework::Tensor *dxy, + std::vector ins, + const phi::DenseTensor *dout, + phi::DenseTensor *dxy, Functor func) { phi::GetGradXOrYOut( dev_ctx, place, axis, ins, *dout, dxy, func); diff --git a/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h b/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h index db1cc766a3e61..1e9b87c965656 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h @@ -29,8 +29,8 @@ using ElementwiseType = phi::ElementwiseType; template void LaunchSameDimsElementwiseCudaKernel( const KPDevice &ctx, - const std::vector &ins, - std::vector *outs, + const std::vector &ins, + std::vector *outs, Functor func) { std::vector pt_inputs; std::vector pt_outputs; diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc index 431122641ec3d..6942377049b47 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op_mlu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwisePowMLUKernel : public framework::OpKernel { @@ -32,11 +32,11 @@ template class ElementwisePowGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc index 9e935bb683232..18853222ba6b7 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op_npu.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwisePowNPUKernel : public framework::OpKernel { @@ -30,9 +30,9 @@ class ElementwisePowNPUKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); int axis = ctx.Attr("axis"); @@ -72,11 +72,11 @@ class ElementwisePowGradNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc index 49fae0a9f5b61..0f56044d268e4 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op_mlu.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseSubMLUKernel : public framework::OpKernel { @@ -36,11 +36,11 @@ class ElementwiseSubGradMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); axis = (axis == -1 ? std::abs(x->dims().size() - y->dims().size()) : axis); @@ -79,7 +79,7 @@ class ElementwiseSubGradMLUKernel : public framework::OpKernel { } if (dy) { dy->mutable_data(ctx.GetPlace()); - Tensor* tmp_dout = const_cast(dout); + phi::DenseTensor* tmp_dout = const_cast(dout); if (dy->dims() != dout->dims()) { std::vector dst_dims_vec; std::vector reduce_axes; diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc index ca4c469ce2c66..8df295a972559 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op_npu.cc @@ -21,15 +21,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ElementwiseSubNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); @@ -46,9 +46,9 @@ template class ElementwiseSubGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); auto stream = ctx.template device_context() @@ -75,7 +75,7 @@ class ElementwiseSubGradNPUKernel : public framework::OpKernel { for (auto i = 0; i < reduce_ndim; ++i) { axes.push_back(i); } - Tensor* tmp_dout = const_cast(dout); + phi::DenseTensor* tmp_dout = const_cast(dout); Tensor reduced_dout(dx->type()); if (axes.size() != 0) { std::vector reduced_dout_dims; @@ -123,7 +123,7 @@ class ElementwiseSubGradNPUKernel : public framework::OpKernel { for (auto i = 0; i < reduce_ndim; ++i) { axes.push_back(i); } - Tensor* tmp_dout = const_cast(dout); + phi::DenseTensor* tmp_dout = const_cast(dout); Tensor reduced_dy(dy->type()); Tensor reduced_dout(dy->type()); @@ -145,7 +145,7 @@ class ElementwiseSubGradNPUKernel : public framework::OpKernel { // stage 2 axes.clear(); - Tensor* tmp_dy = tmp_dout; + phi::DenseTensor* tmp_dy = tmp_dout; for (auto i = 0; i < dy->dims().size(); ++i) { if (dy->dims()[i] == 1) { axes.push_back(i); diff --git a/paddle/fluid/operators/elementwise/elementwise_xpu.h b/paddle/fluid/operators/elementwise/elementwise_xpu.h index 403ba5a592fd0..10e4813008af4 100644 --- a/paddle/fluid/operators/elementwise/elementwise_xpu.h +++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h @@ -68,11 +68,11 @@ void XPUElementwiseGrad(const framework::ExecutionContext& ctx, const std::vector&, const std::vector&)> func, bool use_x_y_data) { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dz = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dz = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int axis = ctx.Attr("axis"); auto& dev_ctx = diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h index 42d749b7b8e3e..c830d5a5bc5df 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h @@ -28,10 +28,9 @@ using dnnl::memory; using dnnl::primitive; using dnnl::stream; using framework::DataLayout; -using framework::Tensor; -inline std::vector CalculateBroadcastedDims(const Tensor* x, - const Tensor* y) { +inline std::vector CalculateBroadcastedDims( + const phi::DenseTensor* x, const phi::DenseTensor* y) { const auto src_tz = phi::vectorize(x->dims()); const auto dst_tz = phi::vectorize(y->dims()); @@ -60,9 +59,9 @@ class EltwiseMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* z = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* z = ctx.Output("Out"); float scale_x = ctx.Attr("Scale_x"); float scale_y = ctx.Attr("Scale_y"); @@ -136,19 +135,19 @@ class EltwiseMKLDNNGradKernel : public ElemwiseGradKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { ElemwiseGradKernel::Compute(ctx); - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Input("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Input("Out"); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* dout = ctx.Input(framework::GradVarName("Out")); // oneDNN's binary is optimized for broadcasting y into x, so in other case // we have to swap tensors to achieve optimal performance diff --git a/paddle/fluid/operators/empty_op.cc b/paddle/fluid/operators/empty_op.cc index aed1ca284a1af..47dc2eb383249 100644 --- a/paddle/fluid/operators/empty_op.cc +++ b/paddle/fluid/operators/empty_op.cc @@ -55,7 +55,7 @@ class EmptyOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "ShapeTensor" || var_name == "ShapeTensorList") { return expected_kernel_type; diff --git a/paddle/fluid/operators/expand_as_op.cc b/paddle/fluid/operators/expand_as_op.cc index 3d32c9b8a148f..b793d835fca98 100644 --- a/paddle/fluid/operators/expand_as_op.cc +++ b/paddle/fluid/operators/expand_as_op.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class ExpandAsOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/expand_as_op.h b/paddle/fluid/operators/expand_as_op.h index 7a856a0153dd4..58b6b619c231a 100644 --- a/paddle/fluid/operators/expand_as_op.h +++ b/paddle/fluid/operators/expand_as_op.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -38,7 +38,7 @@ template class ExpandAsKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); switch (rank) { case 1: ExpandAs<1>(context); @@ -69,10 +69,10 @@ class ExpandAsKernel : public framework::OpKernel { protected: template void ExpandAs(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto in_dims = in0->dims(); - auto* target_tensor = context.Input("target_tensor"); - auto* out0 = context.Output("Out"); + auto* target_tensor = context.Input("target_tensor"); + auto* out0 = context.Output("Out"); Eigen::DSizes bcast_dims; int bcast_dims_remainder = 0; auto x_dims = in0->dims(); @@ -113,8 +113,8 @@ template class ExpandAsGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto* target_tensor = context.Input("target_tensor"); + auto* in0 = context.Input("X"); + auto* target_tensor = context.Input("target_tensor"); auto x_dims = in0->dims(); auto y_dims = target_tensor->dims(); std::vector bcast_dims; @@ -138,8 +138,10 @@ class ExpandAsGradKernel : public framework::OpKernel { } // no need reduce, just copy if (just_copy) { - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); + auto* in0 = + context.Input(framework::GradVarName("Out")); + auto* out0 = + context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); framework::TensorCopy( *in0, context.GetPlace(), context.device_context(), out0); @@ -194,8 +196,8 @@ class ExpandAsGradKernel : public framework::OpKernel { const std::vector& reduce_dims_vec) const { size_t reshape_size = reshape_dims_vec.size(); size_t reduce_size = reduce_dims_vec.size(); - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); auto x_grad = EigenVector::Flatten(*out0); Eigen::DSizes reshape_dims; diff --git a/paddle/fluid/operators/expand_as_v2_op.cc b/paddle/fluid/operators/expand_as_v2_op.cc index 6fcf301897f29..772ef09219817 100644 --- a/paddle/fluid/operators/expand_as_v2_op.cc +++ b/paddle/fluid/operators/expand_as_v2_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class ExpandAsV2Op : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/expand_as_v2_op.h b/paddle/fluid/operators/expand_as_v2_op.h index 5533e7bf91205..1205fc0447f1e 100644 --- a/paddle/fluid/operators/expand_as_v2_op.h +++ b/paddle/fluid/operators/expand_as_v2_op.h @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/expand_as_v2_op_mlu.cc b/paddle/fluid/operators/expand_as_v2_op_mlu.cc index 3a7ced3a0cef3..8184af44916bb 100644 --- a/paddle/fluid/operators/expand_as_v2_op_mlu.cc +++ b/paddle/fluid/operators/expand_as_v2_op_mlu.cc @@ -20,13 +20,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ExpandAsV2MLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); auto target_shape = context.Attr>("target_shape"); auto target_rank = target_shape.size(); PADDLE_ENFORCE_GE(target_rank, @@ -55,7 +55,7 @@ class ExpandAsV2MLUKernel : public framework::OpKernel { protected: void ExpandAs(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto in_dims = in0->dims(); auto target_shape = context.Attr>("target_shape"); auto vec_in_dims = phi::vectorize(in_dims); @@ -79,7 +79,7 @@ class ExpandAsV2MLUKernel : public framework::OpKernel { target_shape[i])); } } - auto* out0 = context.Output("Out"); + auto* out0 = context.Output("Out"); framework::DDim out_dims = phi::make_ddim(target_shape); diff --git a/paddle/fluid/operators/expand_as_v2_op_npu.cc b/paddle/fluid/operators/expand_as_v2_op_npu.cc index 69513d26a6fff..0ac693ff600c5 100644 --- a/paddle/fluid/operators/expand_as_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_as_v2_op_npu.cc @@ -21,7 +21,7 @@ template class ExpandAsV2NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); auto target_shape = context.Attr>("target_shape"); auto target_rank = target_shape.size(); PADDLE_ENFORCE_GE(target_rank, @@ -50,7 +50,7 @@ class ExpandAsV2NPUKernel : public framework::OpKernel { protected: void ExpandAs(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto in_dims = in0->dims(); auto target_shape = context.Attr>("target_shape"); auto vec_in_dims = phi::vectorize(in_dims); @@ -74,7 +74,7 @@ class ExpandAsV2NPUKernel : public framework::OpKernel { target_shape[i])); } } - auto* out0 = context.Output("Out"); + auto* out0 = context.Output("Out"); framework::DDim out_dims = phi::make_ddim(target_shape); diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 1261b7777010e..67b8102181e1b 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class ExpandOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -88,7 +86,7 @@ class ExpandOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "expand_times_tensor" || var_name == "ExpandTimes") { return expected_kernel_type; @@ -217,7 +215,7 @@ class ExpandGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "expand_times_tensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/expand_op.h b/paddle/fluid/operators/expand_op.h index 72eab31e157ad..dc7e42f48333e 100644 --- a/paddle/fluid/operators/expand_op.h +++ b/paddle/fluid/operators/expand_op.h @@ -30,7 +30,7 @@ inline std::vector get_expand_times( if (ctx.HasInput("ExpandTimes")) { auto* expand_tensor = ctx.Input("ExpandTimes"); auto* expand_data = expand_tensor->data(); - framework::Tensor cpu_expand_tensor; + phi::DenseTensor cpu_expand_tensor; if (platform::is_gpu_place(expand_tensor->place())) { paddle::framework::TensorCopySync( *expand_tensor, platform::CPUPlace(), &cpu_expand_tensor); @@ -56,20 +56,20 @@ inline std::vector get_expand_times( } auto list_expand_times_tensor = - ctx.MultiInput("expand_times_tensor"); + ctx.MultiInput("expand_times_tensor"); if (list_expand_times_tensor.size() > 0) { // get tensor from std::vector vec_epxand_times; for (size_t i = 0; i < list_expand_times_tensor.size(); ++i) { auto tensor = list_expand_times_tensor[i]; if (platform::is_gpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_times.push_back(*temp.data()); } #ifdef PADDLE_WITH_XPU else if (platform::is_xpu_place(tensor->place())) { // NOLINT - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_times.push_back(*temp.data()); } @@ -85,7 +85,7 @@ inline std::vector get_expand_times( } } -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -101,7 +101,7 @@ template class ExpandKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( rank, 1, @@ -142,7 +142,7 @@ class ExpandKernel : public framework::OpKernel { protected: template void Expand(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto in_dims = in0->dims(); auto expand_times = get_expand_times(context); @@ -154,7 +154,7 @@ class ExpandKernel : public framework::OpKernel { "of dimensions (%d) of the input.", expand_times.size(), static_cast(in_dims.size()))); - auto* out0 = context.Output("Out"); + auto* out0 = context.Output("Out"); Eigen::DSizes bcast_dims; for (size_t i = 0; i < expand_times.size(); ++i) { bcast_dims[i] = expand_times[i]; @@ -187,7 +187,7 @@ template class ExpandGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); // auto& expand_times = context.Attr>("expand_times"); auto expand_times = get_expand_times(context); auto x_dims = in0->dims(); @@ -214,8 +214,10 @@ class ExpandGradKernel : public framework::OpKernel { } // no need reduce, just copy if (just_copy) { - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); + auto* in0 = + context.Input(framework::GradVarName("Out")); + auto* out0 = + context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); framework::TensorCopy( *in0, context.GetPlace(), context.device_context(), out0); @@ -285,8 +287,8 @@ class ExpandGradKernel : public framework::OpKernel { "reduce dimensions (%d).", reduce_size, reduce_dims_vec.size())); - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); out0->mutable_data(context.GetPlace()); auto x_grad = EigenVector::Flatten(*out0); Eigen::DSizes reshape_dims; diff --git a/paddle/fluid/operators/expand_op_npu.cc b/paddle/fluid/operators/expand_op_npu.cc index 45870767699ea..f1c81cb1b9ca0 100644 --- a/paddle/fluid/operators/expand_op_npu.cc +++ b/paddle/fluid/operators/expand_op_npu.cc @@ -26,7 +26,7 @@ template class ExpandNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( rank, 1, diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index fd92a43318c58..fb82f0b6524ba 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -28,8 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class ExpandV2Op : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -53,7 +51,7 @@ class ExpandV2Op : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "expand_shapes_tensor" || var_name == "Shape") { return expected_kernel_type; @@ -179,7 +177,7 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "expand_shapes_tensor" || var_name == "Shape") { return expected_kernel_type; diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index 2bf31ff221c5f..3c6d017977951 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -31,7 +31,7 @@ inline std::vector get_expand_shape( if (ctx.HasInput("Shape")) { auto* shape_tensor = ctx.Input("Shape"); auto* shape_data = shape_tensor->data(); - framework::Tensor cpu_shape_tensor; + phi::DenseTensor cpu_shape_tensor; if (platform::is_gpu_place(shape_tensor->place())) { paddle::framework::TensorCopySync( *shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); @@ -64,34 +64,34 @@ inline std::vector get_expand_shape( } auto list_expand_shapes_tensor = - ctx.MultiInput("expand_shapes_tensor"); + ctx.MultiInput("expand_shapes_tensor"); if (list_expand_shapes_tensor.size() > 0) { // get tensor from std::vector vec_epxand_shape; for (size_t i = 0; i < list_expand_shapes_tensor.size(); ++i) { auto tensor = list_expand_shapes_tensor[i]; if (platform::is_gpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_shape.push_back(*temp.data()); } #ifdef PADDLE_WITH_ASCEND_CL else if (platform::is_npu_place(tensor->place())) { // NOLINT - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_shape.push_back(*temp.data()); } #endif #ifdef PADDLE_WITH_XPU else if (platform::is_xpu_place(tensor->place())) { // NOLINT - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_shape.push_back(*temp.data()); } #endif #ifdef PADDLE_WITH_MLU else if (platform::is_mlu_place(tensor->place())) { // NOLINT - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_epxand_shape.push_back(*temp.data()); } diff --git a/paddle/fluid/operators/expand_v2_op_mlu.cc b/paddle/fluid/operators/expand_v2_op_mlu.cc index 9dbf3df06d51a..4ae0b4192ab53 100644 --- a/paddle/fluid/operators/expand_v2_op_mlu.cc +++ b/paddle/fluid/operators/expand_v2_op_mlu.cc @@ -24,8 +24,8 @@ template class ExpandV2MLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Out = ctx.Output("Out"); auto in_dims = X->dims(); auto expand_shape = get_expand_shape(ctx); auto vec_in_dims = phi::vectorize(in_dims); diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc index feb45f7d2e48c..d5748328b1d4d 100644 --- a/paddle/fluid/operators/expand_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_v2_op_npu.cc @@ -19,13 +19,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ExpandV2NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Out = ctx.Output("Out"); auto in_dims = X->dims(); auto expand_shape = get_expand_shape(ctx); @@ -158,8 +158,8 @@ template class ExpandV2NPUGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); auto stream = diff --git a/paddle/fluid/operators/eye_op_npu.cc b/paddle/fluid/operators/eye_op_npu.cc index 3cf0f3830be19..6a01992c83335 100644 --- a/paddle/fluid/operators/eye_op_npu.cc +++ b/paddle/fluid/operators/eye_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class EyeNPUKernel : public framework::OpKernel { @@ -36,7 +36,7 @@ class EyeNPUKernel : public framework::OpKernel { framework::NPUAttributeMap attr_input = { {"num_rows", num_rows}, {"num_columns", num_columns}, {"dtype", dtype}}; - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); const auto& runner = NpuOpRunner("Eye", {}, {*out}, attr_input); diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index 4e1df4f98ab57..4f140e0a00d18 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -25,10 +25,10 @@ namespace operators { template struct DequantizeFunctor { void operator()(const phi::CPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, T max_range, - framework::Tensor* out) { + phi::DenseTensor* out) { auto in_e = framework::EigenVector::Flatten(*in); const T* scale_factor = scale->data(); auto out_e = framework::EigenVector::Flatten(*out); @@ -41,13 +41,13 @@ struct DequantizeFunctor { template struct ChannelDequantizeFunctor { void operator()(const phi::CPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor** scales, + const phi::DenseTensor* in, + const phi::DenseTensor** scales, const int scale_num, T max_range, const int quant_axis, const int x_num_col_dims, - framework::Tensor* out) { + phi::DenseTensor* out) { if (scale_num == 1) { // Dequant op is before quantized op // Dequantize the weight of quantized op @@ -57,8 +57,8 @@ struct ChannelDequantizeFunctor { if (quant_axis == 0) { for (int64_t i = 0; i < channel; i++) { T s = scale_factor[i]; - framework::Tensor one_channel_in = in->Slice(i, i + 1); - framework::Tensor one_channel_out = out->Slice(i, i + 1); + phi::DenseTensor one_channel_in = in->Slice(i, i + 1); + phi::DenseTensor one_channel_out = out->Slice(i, i + 1); auto in_e = framework::EigenVector::Flatten(one_channel_in); auto out_e = framework::EigenVector::Flatten(one_channel_out); auto& dev = *dev_ctx.eigen_device(); @@ -120,14 +120,14 @@ struct ChannelDequantizeFunctor { const T* scale_one = scales[0]->data(); const T* scale_two = scales[1]->data(); for (int i = 0; i < batch_size; i++) { - framework::Tensor one_batch_in = in->Slice(i, i + 1).Resize( + phi::DenseTensor one_batch_in = in->Slice(i, i + 1).Resize( phi::slice_ddim(in->dims(), 1, in->dims().size())); - framework::Tensor one_batch_out = out->Slice(i, i + 1).Resize( + phi::DenseTensor one_batch_out = out->Slice(i, i + 1).Resize( phi::slice_ddim(out->dims(), 1, out->dims().size())); for (int j = 0; j < channel; j++) { T s = scale_one[j]; - framework::Tensor one_channel_in = one_batch_in.Slice(j, j + 1); - framework::Tensor one_channel_out = one_batch_out.Slice(j, j + 1); + phi::DenseTensor one_channel_in = one_batch_in.Slice(j, j + 1); + phi::DenseTensor one_channel_out = one_batch_out.Slice(j, j + 1); auto in_e = framework::EigenVector::Flatten(one_channel_in); auto out_e = framework::EigenVector::Flatten(one_channel_out); auto& dev = *dev_ctx.eigen_device(); diff --git a/paddle/fluid/operators/fake_dequantize_op.cu.h b/paddle/fluid/operators/fake_dequantize_op.cu.h index 17b0d9787169e..20088c11f2aa0 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cu.h +++ b/paddle/fluid/operators/fake_dequantize_op.cu.h @@ -33,10 +33,10 @@ __global__ void KeDequantize( template struct DequantizeFunctor { void operator()(const phi::GPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, T max_range, - framework::Tensor* out) { + phi::DenseTensor* out) { const T* in_data = in->data(); const T* scale_factor = scale->data(); T* out_data = out->mutable_data(dev_ctx.GetPlace()); @@ -102,13 +102,13 @@ __global__ void DequantizeTwoScale(const T* in, template struct ChannelDequantizeFunctor { void operator()(const phi::GPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor** scales, + const phi::DenseTensor* in, + const phi::DenseTensor** scales, const int scale_num, T max_range, const int quant_axis, const int x_num_col_dims, - framework::Tensor* out) { + phi::DenseTensor* out) { auto in_dims = in->dims(); const T* in_data = in->data(); T* out_data = out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h index cf8a7e148e40c..fba98963031b7 100644 --- a/paddle/fluid/operators/fake_dequantize_op.h +++ b/paddle/fluid/operators/fake_dequantize_op.h @@ -26,31 +26,31 @@ namespace operators { template struct DequantizeFunctor { void operator()(const DeviceContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, T max_range, - framework::Tensor* out); + phi::DenseTensor* out); }; template struct ChannelDequantizeFunctor { void operator()(const DeviceContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor** scales, + const phi::DenseTensor* in, + const phi::DenseTensor** scales, const int scale_num, T max_range, const int quant_axis, const int x_num_col_dims, - framework::Tensor* out); + phi::DenseTensor* out); }; template class FakeDequantizeMaxAbsKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* out = ctx.Output("Out"); + auto* in = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* out = ctx.Output("Out"); float max_range = ctx.Attr("max_range"); @@ -66,9 +66,9 @@ template class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("X"); - auto scales = ctx.MultiInput("Scales"); - auto* out = ctx.Output("Out"); + auto* in = ctx.Input("X"); + auto scales = ctx.MultiInput("Scales"); + auto* out = ctx.Output("Out"); auto quant_bits = ctx.Attr>("quant_bits"); auto quant_axis = ctx.Attr("quant_axis"); diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index cb8263714a5e4..a97a52145d127 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -46,7 +46,7 @@ template struct FindAbsMaxFunctor; template struct FindChannelAbsMaxFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &in_tensor, + const phi::DenseTensor &in_tensor, const int quant_axis, T *out_abs_max) { // At present, channelwise quantization supports conv2d, depthwise_conv2d @@ -91,11 +91,11 @@ template struct FindChannelAbsMaxFunctor; template struct ClipAndFakeQuantFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, - framework::Tensor *out) { + phi::DenseTensor *out) { T s = scale.data()[0]; T inv_s = inverse(s); platform::Transform trans; @@ -122,11 +122,11 @@ template struct ClipAndFakeQuantFunctor; template struct ClipAndFakeQuantDequantFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, - framework::Tensor *out) { + phi::DenseTensor *out) { T s = scale.data()[0]; T inv_s = inverse(s); @@ -156,12 +156,12 @@ template struct ClipAndFakeQuantDequantFunctor; template struct ChannelClipAndFakeQuantFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor *out) { + phi::DenseTensor *out) { // At present, channelwise quantization supports conv2d, depthwise_conv2d // conv2d_transpose and mul PADDLE_ENFORCE_EQ( @@ -201,7 +201,7 @@ struct ChannelClipAndFakeQuantFunctor { for (int64_t i = 0; i < channel; i++) { T s = scale_data[i]; T inv_s = inverse(s); - framework::Tensor one_channel_out = out->Slice(i, i + 1); + phi::DenseTensor one_channel_out = out->Slice(i, i + 1); auto out_e = framework::EigenVector::Flatten(one_channel_out); out_e.device(*ctx.eigen_device()) = (bin_cnt * inv_s * out_e).round(); } @@ -238,12 +238,12 @@ template struct ChannelClipAndFakeQuantFunctor; template struct ChannelClipFakeQuantDequantFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor *out) { + phi::DenseTensor *out) { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, @@ -280,7 +280,7 @@ struct ChannelClipFakeQuantDequantFunctor { } for (int i = 0; i < channel; i++) { T s = scale_data[i]; - framework::Tensor one_channel_out = out->Slice(i, i + 1); + phi::DenseTensor one_channel_out = out->Slice(i, i + 1); auto out_e = framework::EigenVector::Flatten(one_channel_out); if (round_type == 0) { out_e.device(*ctx.eigen_device()) = @@ -328,12 +328,12 @@ template struct ChannelClipFakeQuantDequantFunctor; template struct FindRangeAbsMaxFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &cur_scale, - const framework::Tensor &last_scale, - const framework::Tensor &iter, + const phi::DenseTensor &cur_scale, + const phi::DenseTensor &last_scale, + const phi::DenseTensor &iter, const int window_size, - framework::Tensor *scales_arr, - framework::Tensor *out_scale) { + phi::DenseTensor *scales_arr, + phi::DenseTensor *out_scale) { T *scale_arr = scales_arr->mutable_data(ctx.GetPlace()); int64_t it = iter.data()[0]; int idx = it % window_size; @@ -357,13 +357,13 @@ template struct FindRangeAbsMaxFunctor; template struct FindMovingAverageAbsMaxFunctor { void operator()(const phi::CPUContext &ctx, - const framework::Tensor &in_accum, - const framework::Tensor &in_state, + const phi::DenseTensor &in_accum, + const phi::DenseTensor &in_state, const T *cur_scale, const float rate, - framework::Tensor *out_state, - framework::Tensor *out_accum, - framework::Tensor *out_scale) { + phi::DenseTensor *out_state, + phi::DenseTensor *out_accum, + phi::DenseTensor *out_scale) { T accum = in_accum.data()[0]; T state = in_state.data()[0]; T scale = cur_scale[0]; diff --git a/paddle/fluid/operators/fake_quantize_op.cu.h b/paddle/fluid/operators/fake_quantize_op.cu.h index 22ba8254cdc2c..10988d8807c6e 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu.h +++ b/paddle/fluid/operators/fake_quantize_op.cu.h @@ -81,7 +81,7 @@ struct FindAbsMaxFunctor { int grid = (block - 1 + num) / block; grid = (grid > block) ? block : grid; - framework::Tensor max; + phi::DenseTensor max; T *max_data = max.mutable_data(phi::make_ddim({grid}), ctx.GetPlace()); FindAbsMaxKernel <<>>(in, num, max_data); @@ -165,7 +165,7 @@ __global__ void FindChannelAbsMaxKernelQuantAxis1( template struct FindChannelAbsMaxFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &in_tensor, + const phi::DenseTensor &in_tensor, const int quant_axis, T *out_abs_max) { PADDLE_ENFORCE_EQ( @@ -290,11 +290,11 @@ __global__ void ClipAndQuantDequantKernel(const T *in, template struct ClipAndFakeQuantFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, - framework::Tensor *out) { + phi::DenseTensor *out) { int num = in.numel(); int block = 1024; int grid = (block - 1 + num) / block; @@ -313,11 +313,11 @@ template struct ClipAndFakeQuantFunctor; template struct ClipAndFakeQuantDequantFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, - framework::Tensor *out) { + phi::DenseTensor *out) { int num = in.numel(); int block = 1024; int grid = (block - 1 + num) / block; @@ -409,12 +409,12 @@ __global__ void ChannelClipAndQuantKernelQuantAxisN(const T *in, template struct ChannelClipAndFakeQuantFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor *out) { + phi::DenseTensor *out) { PADDLE_ENFORCE_EQ( quant_axis == 0 || quant_axis == 1, true, @@ -491,18 +491,18 @@ __global__ void FindRangeAbsMaxAndFillArray(const T *cur_scale, template struct FindRangeAbsMaxFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &cur_scale, - const framework::Tensor &last_scale, - const framework::Tensor &iter, + const phi::DenseTensor &cur_scale, + const phi::DenseTensor &last_scale, + const phi::DenseTensor &iter, const int window_size, - framework::Tensor *scales_arr, - framework::Tensor *out_scale) { + phi::DenseTensor *scales_arr, + phi::DenseTensor *out_scale) { const auto gpu_place = ctx.GetPlace(); T *scale_arr = scales_arr->mutable_data(gpu_place); T *out_scale_data = out_scale->mutable_data(gpu_place); - framework::Tensor need_find_max, out_size; + phi::DenseTensor need_find_max, out_size; int *find_max = need_find_max.mutable_data({1}, gpu_place); int *out_size_data = out_size.mutable_data({1}, gpu_place); @@ -559,13 +559,13 @@ template struct FindRangeAbsMaxFunctor; template struct FindMovingAverageAbsMaxFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &in_accum, - const framework::Tensor &in_state, + const phi::DenseTensor &in_accum, + const phi::DenseTensor &in_state, const T *cur_scale, const float rate, - framework::Tensor *out_state, - framework::Tensor *out_accum, - framework::Tensor *out_scale) { + phi::DenseTensor *out_state, + phi::DenseTensor *out_accum, + phi::DenseTensor *out_scale) { const auto gpu_place = ctx.GetPlace(); T rate_t = static_cast(rate); @@ -660,12 +660,12 @@ __global__ void ChannelClipAndQuantDequantKernelQuantAxis1(const T *in, template struct ChannelClipFakeQuantDequantFunctor { void operator()(const phi::GPUContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor *out) { + phi::DenseTensor *out) { // At present, channelwise quantization supports conv2d, depthwise_conv2d // conv2d_transpose and mul PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 92aaa1fb248b9..bbe0c4d38eae0 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -76,38 +76,38 @@ struct FindAbsMaxFunctor { template struct ClipAndFakeQuantFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, - framework::Tensor *out); + phi::DenseTensor *out); }; template struct ClipAndFakeQuantDequantFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, int round_type, - framework::Tensor *out); + phi::DenseTensor *out); }; template struct FindRangeAbsMaxFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &cur_scale, - const framework::Tensor &last_scale, - const framework::Tensor &iter, + const phi::DenseTensor &cur_scale, + const phi::DenseTensor &last_scale, + const phi::DenseTensor &iter, const int window_size, - framework::Tensor *scales_arr, - framework::Tensor *out_scale); + phi::DenseTensor *scales_arr, + phi::DenseTensor *out_scale); }; template struct FindChannelAbsMaxFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &in_tensor, + const phi::DenseTensor &in_tensor, const int quant_axis, T *out_abs_max); }; @@ -115,44 +115,44 @@ struct FindChannelAbsMaxFunctor { template struct ChannelClipAndFakeQuantFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, const int round_type, const int quant_axis, - framework::Tensor *out); + phi::DenseTensor *out); }; template struct ChannelClipFakeQuantDequantFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, const int bin_cnt, int round_type, const int quant_axis, - framework::Tensor *out); + phi::DenseTensor *out); }; template struct FindMovingAverageAbsMaxFunctor { void operator()(const DeviceContext &ctx, - const framework::Tensor &in_accum, - const framework::Tensor &in_state, + const phi::DenseTensor &in_accum, + const phi::DenseTensor &in_state, const T *cur_scale, const float rate, - framework::Tensor *out_state, - framework::Tensor *out_accum, - framework::Tensor *out_scale); + phi::DenseTensor *out_state, + phi::DenseTensor *out_accum, + phi::DenseTensor *out_scale); }; template class FakeAbsMaxKernelBase : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); - auto *out = context.Output("Out"); - auto *out_scale = context.Output("OutScale"); + auto *in = context.Input("X"); + auto *out = context.Output("Out"); + auto *out_scale = context.Output("OutScale"); T *out_s = out_scale->mutable_data(context.GetPlace()); int bit_length = context.Attr("bit_length"); @@ -169,22 +169,22 @@ class FakeAbsMaxKernelBase : public framework::OpKernel { protected: virtual void RunClipFunctor(const DeviceContext &dev_ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, int bin_cnt, int round_type, - framework::Tensor *out) const = 0; + phi::DenseTensor *out) const = 0; }; template class FakeQuantizeAbsMaxKernel : public FakeAbsMaxKernelBase { protected: void RunClipFunctor(const DeviceContext &dev_ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, int bin_cnt, int round_type, - framework::Tensor *out) const override { + phi::DenseTensor *out) const override { ClipAndFakeQuantFunctor()( dev_ctx, in, scale, bin_cnt, round_type, out); } @@ -195,11 +195,11 @@ class FakeQuantizeDequantizeAbsMaxKernel : public FakeAbsMaxKernelBase { protected: void RunClipFunctor(const DeviceContext &dev_ctx, - const framework::Tensor &in, - const framework::Tensor &scale, + const phi::DenseTensor &in, + const phi::DenseTensor &scale, int bin_cnt, int round_type, - framework::Tensor *out) const override { + phi::DenseTensor *out) const override { ClipAndFakeQuantDequantFunctor()( dev_ctx, in, scale, bin_cnt, round_type, out); } @@ -209,10 +209,10 @@ template class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); + auto *in = context.Input("X"); - auto *out = context.Output("Out"); - auto *out_scale = context.Output("OutScale"); + auto *out = context.Output("Out"); + auto *out_scale = context.Output("OutScale"); out->mutable_data(context.GetPlace()); int bit_length = context.Attr("bit_length"); @@ -237,9 +237,9 @@ class FakeChannelWiseQuantizeDequantizeAbsMaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); - auto *out = context.Output("Out"); - auto *out_scale = context.Output("OutScale"); + auto *in = context.Input("X"); + auto *out = context.Output("Out"); + auto *out_scale = context.Output("OutScale"); T *out_scale_data = out_scale->mutable_data(context.GetPlace()); auto &dev_ctx = context.template device_context(); out->mutable_data(dev_ctx.GetPlace()); @@ -261,10 +261,10 @@ template class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); - auto *in_scale = context.Input("InScale"); + auto *in = context.Input("X"); + auto *in_scale = context.Input("InScale"); - auto *out = context.Output("Out"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); bool is_test = context.Attr("is_test"); @@ -281,14 +281,14 @@ class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel { } // training - auto *out_scale = context.Output("OutScale"); - auto *out_scales = context.Output("OutScales"); - auto *iter = context.Input("Iter"); + auto *out_scale = context.Output("OutScale"); + auto *out_scales = context.Output("OutScales"); + auto *iter = context.Input("Iter"); int window_size = context.Attr("window_size"); out_scale->mutable_data(context.GetPlace()); - framework::Tensor cur_scale; + phi::DenseTensor cur_scale; T *cur_scale_data = cur_scale.mutable_data({1}, context.GetPlace()); FindAbsMaxFunctor()( dev_ctx, in->data(), in->numel(), cur_scale_data); @@ -308,9 +308,9 @@ template class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); - auto *in_scale = context.Input("InScale"); - auto *out = context.Output("Out"); + auto *in = context.Input("X"); + auto *in_scale = context.Input("InScale"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); bool is_test = context.Attr("is_test"); @@ -326,8 +326,8 @@ class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { } // training - auto *in_accum = context.Input("InAccum"); - auto *in_state = context.Input("InState"); + auto *in_accum = context.Input("InAccum"); + auto *in_state = context.Input("InState"); phi::DenseTensor tmp_scale; tmp_scale.Resize(phi::make_dim(1)); @@ -336,9 +336,9 @@ class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { FindAbsMaxFunctor()( dev_ctx, in->data(), in->numel(), cur_scale_data); - auto *out_state = context.Output("OutState"); - auto *out_accum = context.Output("OutAccum"); - auto *out_scale = context.Output("OutScale"); + auto *out_state = context.Output("OutState"); + auto *out_accum = context.Output("OutAccum"); + auto *out_scale = context.Output("OutScale"); out_state->mutable_data(context.GetPlace()); out_accum->mutable_data(context.GetPlace()); out_scale->mutable_data(context.GetPlace()); @@ -360,11 +360,11 @@ class FakeMovingAverageAbsMaxKernelBase : public framework::OpKernel { protected: virtual void RunClipFunctor(const DeviceContext &dev_ctx, - const framework::Tensor &in, - const framework::Tensor &in_scale, + const phi::DenseTensor &in, + const phi::DenseTensor &in_scale, int bin_cnt, int round_type, - framework::Tensor *out) const = 0; + phi::DenseTensor *out) const = 0; }; template @@ -372,11 +372,11 @@ class FakeQuantizeMovingAverageAbsMaxKernel : public FakeMovingAverageAbsMaxKernelBase { protected: void RunClipFunctor(const DeviceContext &dev_ctx, - const framework::Tensor &in, - const framework::Tensor &in_scale, + const phi::DenseTensor &in, + const phi::DenseTensor &in_scale, int bin_cnt, int round_type, - framework::Tensor *out) const override { + phi::DenseTensor *out) const override { ClipAndFakeQuantFunctor()( dev_ctx, in, in_scale, bin_cnt, round_type, out); } @@ -387,11 +387,11 @@ class FakeQuantizeDequantizeMovingAverageAbsMaxKernel : public FakeMovingAverageAbsMaxKernelBase { protected: void RunClipFunctor(const DeviceContext &dev_ctx, - const framework::Tensor &in, - const framework::Tensor &in_scale, + const phi::DenseTensor &in, + const phi::DenseTensor &in_scale, int bin_cnt, int round_type, - framework::Tensor *out) const override { + phi::DenseTensor *out) const override { ClipAndFakeQuantDequantFunctor()( dev_ctx, in, in_scale, bin_cnt, round_type, out); } @@ -401,11 +401,11 @@ template class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *in = context.Input("X"); + auto *in = context.Input("X"); auto &dev_ctx = context.template device_context(); if (context.HasOutput("Out")) { - auto *out = context.Output("Out"); + auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out); } @@ -417,8 +417,8 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { } // training - auto *in_accum = context.Input("InAccum"); - auto *in_state = context.Input("InState"); + auto *in_accum = context.Input("InAccum"); + auto *in_state = context.Input("InState"); phi::DenseTensor tmp_scale; tmp_scale.Resize(phi::make_dim(1)); T *cur_scale_data = dev_ctx.template Alloc(&tmp_scale); @@ -426,9 +426,9 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { FindAbsMaxFunctor()( dev_ctx, in->data(), in->numel(), cur_scale_data); - auto *out_state = context.Output("OutState"); - auto *out_accum = context.Output("OutAccum"); - auto *out_scale = context.Output("OutScale"); + auto *out_state = context.Output("OutState"); + auto *out_accum = context.Output("OutAccum"); + auto *out_scale = context.Output("OutScale"); out_state->mutable_data(context.GetPlace()); out_accum->mutable_data(context.GetPlace()); out_scale->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/fc_op.h b/paddle/fluid/operators/fc_op.h index 24380b29ee125..87c2d75328fc1 100644 --- a/paddle/fluid/operators/fc_op.h +++ b/paddle/fluid/operators/fc_op.h @@ -24,7 +24,7 @@ namespace paddle { namespace operators { enum { kFCMKLDNNFP32 = 1, kFCMKLDNNINT8 = 2 }; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; inline void FCOutputSize(const framework::DDim& in_dims, const framework::DDim& w_dims, @@ -59,8 +59,8 @@ class FCOpKernel : public framework::OpKernel { public: void Compute(const paddle::framework::ExecutionContext& ctx) const override { auto* input = ctx.Input("Input"); - auto* w = ctx.Input("W"); - auto* bias = ctx.Input("Bias"); + auto* w = ctx.Input("W"); + auto* bias = ctx.Input("Bias"); auto* output = ctx.Output("Out"); int in_num_col_dims = ctx.Attr("in_num_col_dims"); bool with_relu = diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index 43776e98a0225..d337d975c9aa3 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -549,8 +549,8 @@ class TestFeedForward { bool has_bias_; int size_src_, size_weight_, size_bias_, size_output_; - framework::Tensor src_, weight_, bias_, out_, bias_out_; - framework::Tensor dinput_, dweight_, dbias_, doutput_; + phi::DenseTensor src_, weight_, bias_, out_, bias_out_; + phi::DenseTensor dinput_, dweight_, dbias_, doutput_; std::vector src_vec_, weight_vec_, bias_vec_, out_vec_, bias_out_vec_; std::vector dinput_vec_, dweight_vec_, dbias_vec_, doutput_vec_; diff --git a/paddle/fluid/operators/fill_any_like_op.cc b/paddle/fluid/operators/fill_any_like_op.cc index eb66cc88b3145..bf79a98d21df4 100644 --- a/paddle/fluid/operators/fill_any_like_op.cc +++ b/paddle/fluid/operators/fill_any_like_op.cc @@ -43,7 +43,7 @@ class FillAnyLikeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/fill_any_like_op_mlu.cc b/paddle/fluid/operators/fill_any_like_op_mlu.cc index af45f2feb4ee0..5ef52d7b07ec8 100644 --- a/paddle/fluid/operators/fill_any_like_op_mlu.cc +++ b/paddle/fluid/operators/fill_any_like_op_mlu.cc @@ -28,7 +28,7 @@ class FillAnyLikeMLUKernel : public framework::OpKernel { T>::type>::type; void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); float value = ctx.Attr("value"); diff --git a/paddle/fluid/operators/fill_any_like_op_npu.cc b/paddle/fluid/operators/fill_any_like_op_npu.cc index af483d56eeaad..22f2c29bfa8ab 100644 --- a/paddle/fluid/operators/fill_any_like_op_npu.cc +++ b/paddle/fluid/operators/fill_any_like_op_npu.cc @@ -30,7 +30,7 @@ class FillAnyLikeNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto data_type = static_cast( context.Attr("dtype")); - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); float value = context.Attr("value"); diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc index ba426dfe62a35..6fda2f1283fb3 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_mlu.cc @@ -29,7 +29,7 @@ class FillConstantBatchSizeLikeOpMLUKernel : public framework::OpKernel { auto str_value = ctx.Attr("str_value"); auto force_cpu = ctx.Attr("force_cpu"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); auto *in = ctx.Input("Input"); if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { // set the correct batch size for the LoDTensor. diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc index 479b2e19096e5..1f27dbdd4d77e 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op_npu.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { @@ -32,7 +32,7 @@ class FillConstantBatchSizeLikeOpNPUKernel : public framework::OpKernel { auto str_value = ctx.Attr("str_value"); auto force_cpu = ctx.Attr("force_cpu"); - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); auto *in = ctx.Input("Input"); if (in->lod().size() && ctx.Attr("input_dim_idx") == 0) { // set the correct batch size for the LoDTensor. diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index 28167c4736fa3..4c63b9969fd10 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -58,7 +58,7 @@ class FillConstantOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "ShapeTensor" || var_name == "ShapeTensorList") { return expected_kernel_type; diff --git a/paddle/fluid/operators/fill_constant_op_mlu.cc b/paddle/fluid/operators/fill_constant_op_mlu.cc index 487962a7de8ca..664d70609e939 100644 --- a/paddle/fluid/operators/fill_constant_op_mlu.cc +++ b/paddle/fluid/operators/fill_constant_op_mlu.cc @@ -26,7 +26,7 @@ class FillConstantMLUKernel : public framework::OpKernel { auto str_value = ctx.Attr("str_value"); auto float_value = ctx.Attr("value"); - auto *out_var = ctx.Output("Out"); + auto *out_var = ctx.Output("Out"); T value; if (str_value.empty()) { @@ -55,7 +55,7 @@ class FillConstantMLUKernel : public framework::OpKernel { const T *value_data = &value; cnnlPointerMode_t pointer_mode = CNNL_POINTER_MODE_HOST; if (ctx.HasInput("ValueTensor")) { - auto *value_tensor = ctx.Input("ValueTensor"); + auto *value_tensor = ctx.Input("ValueTensor"); PADDLE_ENFORCE_EQ( value_tensor->numel(), 1, diff --git a/paddle/fluid/operators/fill_constant_op_npu.cc b/paddle/fluid/operators/fill_constant_op_npu.cc index 47e26b0d415fa..1947020be857d 100644 --- a/paddle/fluid/operators/fill_constant_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_op_npu.cc @@ -28,7 +28,7 @@ class FillConstantNPUKernel : public framework::OpKernel { auto str_value = ctx.Attr("str_value"); auto float_value = ctx.Attr("value"); - auto *out_var = ctx.Output("Out"); + auto *out_var = ctx.Output("Out"); auto stream = ctx.template device_context() .stream(); diff --git a/paddle/fluid/operators/fill_diagonal_op.cc b/paddle/fluid/operators/fill_diagonal_op.cc index 4bf9635ae45dd..8a7f5daa9f857 100644 --- a/paddle/fluid/operators/fill_diagonal_op.cc +++ b/paddle/fluid/operators/fill_diagonal_op.cc @@ -73,9 +73,9 @@ class FillIDiagonalGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - // Note: don't get data type from ctx.Input("Input"); + // Note: don't get data type from ctx.Input("Input"); auto dtype = framework::TransToProtoVarType( - ctx.Input(framework::GradVarName("Out"))->type()); + ctx.Input(framework::GradVarName("Out"))->type()); return framework::OpKernelType(dtype, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/fill_diagonal_tensor_op.cc b/paddle/fluid/operators/fill_diagonal_tensor_op.cc index ccf9b7aa35938..5a7f56cbfd04d 100644 --- a/paddle/fluid/operators/fill_diagonal_tensor_op.cc +++ b/paddle/fluid/operators/fill_diagonal_tensor_op.cc @@ -72,9 +72,9 @@ class FillDiagonalTensorGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - // Note: don't get data type from ctx.Input("Input"); + // Note: don't get data type from ctx.Input("Input"); auto dtype = - ctx.Input(framework::GradVarName("Out"))->type(); + ctx.Input(framework::GradVarName("Out"))->type(); return framework::OpKernelType(framework::TransToProtoVarType(dtype), ctx.GetPlace()); } diff --git a/paddle/fluid/operators/fill_zeros_like_op.h b/paddle/fluid/operators/fill_zeros_like_op.h index 9c967cf70e2f2..331af861cdff4 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.h +++ b/paddle/fluid/operators/fill_zeros_like_op.h @@ -23,7 +23,7 @@ template class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); phi::funcs::SetConstant setter; diff --git a/paddle/fluid/operators/fill_zeros_like_op_npu.cc b/paddle/fluid/operators/fill_zeros_like_op_npu.cc index 3963dc505ad0c..be5160eef4404 100644 --- a/paddle/fluid/operators/fill_zeros_like_op_npu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op_npu.cc @@ -22,8 +22,8 @@ template class FillZerosLikeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); auto stream = diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu index 5df1e4f651aa9..5777dcf714589 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cu +++ b/paddle/fluid/operators/filter_by_instag_op.cu @@ -43,7 +43,7 @@ namespace cg = cooperative_groups; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using LoDTensor = framework::LoDTensor; @@ -360,7 +360,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel { // X3 is local fc tag list // LoD [[0, Sum(fc1), Sum(fc1, fc2) ...]] - const Tensor* x3 = context.Input("Filter_tag"); + const phi::DenseTensor* x3 = context.Input("Filter_tag"); const int64_t* x3_data = x3->data(); Vector x2_lods; diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h index 66178b180a9e3..869d44430812c 100644 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ b/paddle/fluid/operators/filter_by_instag_op.h @@ -29,7 +29,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using LoDTensor = framework::LoDTensor; @@ -50,7 +50,7 @@ class FilterByInstagKernel : public framework::OpKernel { auto* x2 = context.Input("Ins_tag"); // X3 is local fc tag list // LoD [[0, Sum(fc1), Sum(fc1, fc2) ...]] - auto* x3 = context.Input("Filter_tag"); + auto* x3 = context.Input("Filter_tag"); std::unordered_set filter_tag; auto* x3_data = x3->data(); diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index e160fc6f09ad0..036f3b8222422 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FlattenOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/flatten_op_npu.cc b/paddle/fluid/operators/flatten_op_npu.cc index 7b7f0133d8a11..93e69d0de6159 100644 --- a/paddle/fluid/operators/flatten_op_npu.cc +++ b/paddle/fluid/operators/flatten_op_npu.cc @@ -56,14 +56,14 @@ class Flatten2GradNPUKernel : public framework::OpKernel { } }; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class FlattenContiguousRangeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *X = ctx.Input("X"); - auto *Out = ctx.Output("Out"); + auto *X = ctx.Input("X"); + auto *Out = ctx.Output("Out"); int start_axis = ctx.Attr("start_axis"); int stop_axis = ctx.Attr("stop_axis"); diff --git a/paddle/fluid/operators/flip_op.cc b/paddle/fluid/operators/flip_op.cc index 7f00fad6e3d12..4c14418690a85 100644 --- a/paddle/fluid/operators/flip_op.cc +++ b/paddle/fluid/operators/flip_op.cc @@ -26,7 +26,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; class FlipOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fsp_op.h b/paddle/fluid/operators/fsp_op.h index 1faace15454aa..0f8072520be2f 100644 --- a/paddle/fluid/operators/fsp_op.h +++ b/paddle/fluid/operators/fsp_op.h @@ -20,15 +20,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class FSPOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* output = context.Output("Out"); + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); auto x_dims = x->dims(); auto y_dims = y->dims(); @@ -69,12 +69,13 @@ template class FSPGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* d_x = context.Output(framework::GradVarName("X")); - auto* d_y = context.Output(framework::GradVarName("Y")); + auto* d_x = context.Output(framework::GradVarName("X")); + auto* d_y = context.Output(framework::GradVarName("Y")); if (d_x == nullptr && d_y == nullptr) { return; } - auto* d_out = context.Input(framework::GradVarName("Out")); + auto* d_out = + context.Input(framework::GradVarName("Out")); auto d_out_dims = d_out->dims(); auto batch_size = d_out_dims[0]; auto x_channel = d_out_dims[1]; @@ -89,7 +90,7 @@ class FSPGradOpKernel : public framework::OpKernel { set_zero(context.template device_context(), d_x, static_cast(0)); - auto* y = context.Input("Y"); + auto* y = context.Input("Y"); auto y_dims = y->dims(); h = y_dims[2]; w = y_dims[3]; @@ -122,7 +123,7 @@ class FSPGradOpKernel : public framework::OpKernel { set_zero(context.template device_context(), d_y, static_cast(0)); - auto* x = context.Input("X"); + auto* x = context.Input("X"); auto x_dims = x->dims(); h = x_dims[2]; w = x_dims[3]; diff --git a/paddle/fluid/operators/fused/attn_bias_add.cu.h b/paddle/fluid/operators/fused/attn_bias_add.cu.h index 2b8b857966de1..b44faf3150115 100644 --- a/paddle/fluid/operators/fused/attn_bias_add.cu.h +++ b/paddle/fluid/operators/fused/attn_bias_add.cu.h @@ -324,7 +324,7 @@ void Launch2DColumnReduce(const phi::GPUContext& dev_ctx, BiasAddBwSinglePassKernel <<>>(d_out, reduce_num, left_num, d_bias); } else { - framework::Tensor tmp_sum; + phi::DenseTensor tmp_sum; tmp_sum.Resize({grid.y, left_num}); dev_ctx.template Alloc>( &tmp_sum, tmp_sum.numel() * sizeof(ReduceParamType)); diff --git a/paddle/fluid/operators/fused/attn_gemm.h b/paddle/fluid/operators/fused/attn_gemm.h index 07947f522cdae..c8ea19d463a1b 100644 --- a/paddle/fluid/operators/fused/attn_gemm.h +++ b/paddle/fluid/operators/fused/attn_gemm.h @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; // support gemm-nt and gemm-nn, which is used in fused_attention_op. template class AttnMatMul { @@ -47,11 +47,11 @@ class AttnMatMul { ~AttnMatMul() {} - void ComputeForward(const framework::Tensor* weight, - const framework::Tensor* input, - const framework::Tensor* bias, - framework::Tensor* output, - framework::Tensor* bias_out) { + void ComputeForward(const phi::DenseTensor* weight, + const phi::DenseTensor* input, + const phi::DenseTensor* bias, + phi::DenseTensor* output, + phi::DenseTensor* bias_out) { // Note: for blas.GEMM API in Paddle, it treats all inputs as row-major. // here: (transa, transb): nt, input * weight. CBLAS_TRANSPOSE transA = transA_ ? CblasTrans : CblasNoTrans; @@ -73,19 +73,19 @@ class AttnMatMul { output->data()); if (compute_bias_) { // bias_out = output + bias - std::vector ins = {output, bias}; - std::vector outs = {bias_out}; + std::vector ins = {output, bias}; + std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, -1, phi::funcs::AddFunctor()); } } - void ComputeBackward(const framework::Tensor* input, - const framework::Tensor* weight, - const framework::Tensor* d_output, - framework::Tensor* d_input, - framework::Tensor* d_weight, - framework::Tensor* d_bias, + void ComputeBackward(const phi::DenseTensor* input, + const phi::DenseTensor* weight, + const phi::DenseTensor* d_output, + phi::DenseTensor* d_input, + phi::DenseTensor* d_weight, + phi::DenseTensor* d_bias, bool use_addto = false) { T alpha = static_cast(1.0); T beta_dA = use_addto ? static_cast(1.0) : static_cast(0.0); diff --git a/paddle/fluid/operators/fused/attn_gemm_int8.h b/paddle/fluid/operators/fused/attn_gemm_int8.h index ba114df9085fb..98a45deac3c8d 100644 --- a/paddle/fluid/operators/fused/attn_gemm_int8.h +++ b/paddle/fluid/operators/fused/attn_gemm_int8.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class AttnMatmulINT8 { @@ -41,15 +41,15 @@ class AttnMatmulINT8 { // This function is used to execute GEMM, with input and output's types are // both T. - void ComputeForward(const framework::Tensor* weight, - const framework::Tensor* input, - framework::Tensor* input_tmp, - const framework::Tensor* bias, - framework::Tensor* output, - framework::Tensor* output_tmp, - framework::Tensor* bias_out, + void ComputeForward(const phi::DenseTensor* weight, + const phi::DenseTensor* input, + phi::DenseTensor* input_tmp, + const phi::DenseTensor* bias, + phi::DenseTensor* output, + phi::DenseTensor* output_tmp, + phi::DenseTensor* bias_out, const float quant_in_scale, - const framework::Tensor* dequant_out_scale, + const phi::DenseTensor* dequant_out_scale, const int quant_out_scale_offset, const int quant_round_type = 1, const float quant_max_bound = 127.0, @@ -80,8 +80,8 @@ class AttnMatmulINT8 { if (compute_bias_) { // bias_out = output + bias - std::vector ins = {output, bias}; - std::vector outs = {bias_out}; + std::vector ins = {output, bias}; + std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, -1, phi::funcs::AddFunctor()); PADDLE_ENFORCE_EQ(cudaGetLastError(), @@ -95,11 +95,11 @@ class AttnMatmulINT8 { // This function is used to execute GEMM, with input and output's types are // both INT8. - void ComputeForwardINT8ToINT8(const framework::Tensor* weight, - framework::Tensor* input, - const framework::Tensor* bias, - framework::Tensor* output, - framework::Tensor* bias_out) { + void ComputeForwardINT8ToINT8(const phi::DenseTensor* weight, + phi::DenseTensor* input, + const phi::DenseTensor* bias, + phi::DenseTensor* output, + phi::DenseTensor* bias_out) { helpers_[0]->GEMM(input->data(), weight->data(), output->data(), @@ -108,14 +108,14 @@ class AttnMatmulINT8 { // This function is used to execute GEMM, with input and output's types are // INT8 and T. - void ComputeForwardINT8ToT(const framework::Tensor* weight, + void ComputeForwardINT8ToT(const phi::DenseTensor* weight, const float quant_in_scale, - framework::Tensor* input, - const framework::Tensor* bias, - framework::Tensor* output, - framework::Tensor* output_tmp, - framework::Tensor* bias_out, - const framework::Tensor* dequant_out_scale, + phi::DenseTensor* input, + const phi::DenseTensor* bias, + phi::DenseTensor* output, + phi::DenseTensor* output_tmp, + phi::DenseTensor* bias_out, + const phi::DenseTensor* dequant_out_scale, const int quant_out_scale_offset) { helpers_[0]->GEMM(input->data(), weight->data(), @@ -133,8 +133,8 @@ class AttnMatmulINT8 { if (compute_bias_) { // bias_out = output + bias - std::vector ins = {output, bias}; - std::vector outs = {bias_out}; + std::vector ins = {output, bias}; + std::vector outs = {bias_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, -1, phi::funcs::AddFunctor()); PADDLE_ENFORCE_EQ(cudaGetLastError(), @@ -148,13 +148,13 @@ class AttnMatmulINT8 { // This function is used to execute GEMM, with input and output's types are T // and INT8. - void ComputeForwardTToINT8(const framework::Tensor* weight, + void ComputeForwardTToINT8(const phi::DenseTensor* weight, const float quant_in_scale, - const framework::Tensor* input, - framework::Tensor* input_tmp, - const framework::Tensor* bias, - framework::Tensor* output, - framework::Tensor* bias_out, + const phi::DenseTensor* input, + phi::DenseTensor* input_tmp, + const phi::DenseTensor* bias, + phi::DenseTensor* output, + phi::DenseTensor* bias_out, const int quant_round_type = 1, const float quant_max_bound = 127.0, const float quant_min_bound = -127.0) { diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index 6f0ebc2c7ebf6..5eee2c9332830 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -27,7 +27,7 @@ namespace paddle { namespace operators { #if PADDLE_WITH_HIP || CUDNN_VERSION >= 7100 -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; @@ -45,11 +45,11 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* bias = ctx.Input("Bias"); - auto* residual = ctx.Input("ResidualData"); - auto* output = ctx.Output("Output"); + auto* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* bias = ctx.Input("Bias"); + auto* residual = ctx.Input("ResidualData"); + auto* output = ctx.Output("Output"); dev_ctx.template Alloc(output, output->numel() * sizeof(T)); std::vector strides = ctx.Attr>("strides"); @@ -523,10 +523,10 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { #endif std::vector channels = ctx.Attr>("split_channels"); if (channels.size()) { - auto outs = ctx.MultiOutput("Outputs"); + auto outs = ctx.MultiOutput("Outputs"); if (x_dims[0] == 1) { // share data with Output - framework::Tensor t; + phi::DenseTensor t; t.ShareDataWith(*output); auto y_dims = output->dims(); t.Resize({y_dims[1], y_dims[2], y_dims[3]}); diff --git a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc index e11792a5dfb61..5f30ee4cc832c 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc +++ b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc @@ -31,7 +31,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace framework = paddle::framework; namespace platform = paddle::platform; namespace op = paddle::operators; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; USE_OP_ITSELF(batch_norm); PD_DECLARE_KERNEL(batch_norm, GPU, ALL_LAYOUT); @@ -40,7 +40,7 @@ USE_CUDA_ONLY_OP(fused_bn_add_activation_grad); template void InitRandomTensor(const std::vector &dims, - framework::Tensor *cpu_out) { + phi::DenseTensor *cpu_out) { T *cpu_out_ptr = cpu_out->mutable_data(phi::make_ddim(dims), platform::CPUPlace()); std::default_random_engine random(0); @@ -53,7 +53,7 @@ void InitRandomTensor(const std::vector &dims, template void InitConstantTensor(const std::vector &dims, T value, - framework::Tensor *cpu_out) { + phi::DenseTensor *cpu_out) { T *cpu_out_ptr = cpu_out->mutable_data(phi::make_ddim(dims), platform::CPUPlace()); for (int i = 0; i < cpu_out->numel(); ++i) { @@ -63,8 +63,8 @@ void InitConstantTensor(const std::vector &dims, template void CheckOutput(std::string name, - const framework::Tensor &cpu_res, - const framework::Tensor &cpu_base, + const phi::DenseTensor &cpu_res, + const phi::DenseTensor &cpu_base, float diff, bool is_relative_atol = false) { if (cpu_res.dims().size() == cpu_base.dims().size()) { @@ -102,9 +102,9 @@ void CheckOutput(std::string name, } template -void ComputeSumAndSquareSum(const framework::Tensor &cpu_x, - framework::Tensor *cpu_sum, - framework::Tensor *cpu_sum_of_square) { +void ComputeSumAndSquareSum(const phi::DenseTensor &cpu_x, + phi::DenseTensor *cpu_sum, + phi::DenseTensor *cpu_sum_of_square) { // x is in NHWC format. const auto &dims = cpu_x.dims(); int64_t c = dims[3]; @@ -129,8 +129,7 @@ void ComputeSumAndSquareSum(const framework::Tensor &cpu_x, } template -void ComputeInplaceAdd(const framework::Tensor &cpu_x, - framework::Tensor *cpu_y) { +void ComputeInplaceAdd(const phi::DenseTensor &cpu_x, phi::DenseTensor *cpu_y) { EXPECT_EQ(cpu_x.dims(), cpu_y->dims()); const T *cpu_x_ptr = cpu_x.data(); @@ -141,7 +140,7 @@ void ComputeInplaceAdd(const framework::Tensor &cpu_x, } template -void ComputeInplaceRelu(framework::Tensor *cpu_x) { +void ComputeInplaceRelu(phi::DenseTensor *cpu_x) { T *cpu_x_ptr = cpu_x->data(); for (int64_t i = 0; i < cpu_x->numel(); ++i) { cpu_x_ptr[i] = @@ -389,10 +388,10 @@ class CudnnBNAddReluTester { auto select = [&](Tensor *in) { return has_shortcut_ ? in : nullptr; }; - framework::Tensor cpu_mean_base_x; - framework::Tensor cpu_var_base_x; - framework::Tensor cpu_mean_base_z; - framework::Tensor cpu_var_base_z; + phi::DenseTensor cpu_mean_base_x; + phi::DenseTensor cpu_var_base_x; + phi::DenseTensor cpu_mean_base_z; + phi::DenseTensor cpu_var_base_z; if (!has_shortcut_ && fuse_add_ && (act_type_ == "relu")) { BaselineForwardFusedBNAddRelu(*ctx, &cpu_mean_base_x, @@ -416,11 +415,11 @@ class CudnnBNAddReluTester { select(&saved_reserve_space_z_)); } - framework::Tensor cpu_mean_x; - framework::Tensor cpu_var_x; - framework::Tensor cpu_y; - framework::Tensor cpu_mean_z; - framework::Tensor cpu_var_z; + phi::DenseTensor cpu_mean_x; + phi::DenseTensor cpu_var_x; + phi::DenseTensor cpu_y; + phi::DenseTensor cpu_mean_z; + phi::DenseTensor cpu_var_z; FusedForward(*ctx, &cpu_mean_x, &cpu_var_x, @@ -470,17 +469,17 @@ class CudnnBNAddReluTester { phi::GPUContext *ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); - framework::Tensor cpu_dx_base; - framework::Tensor cpu_dz_base; - framework::Tensor cpu_dscale_base; - framework::Tensor cpu_dbias_base; + phi::DenseTensor cpu_dx_base; + phi::DenseTensor cpu_dz_base; + phi::DenseTensor cpu_dscale_base; + phi::DenseTensor cpu_dbias_base; BaselineBackwardFusedBNAddRelu( *ctx, &cpu_dx_base, &cpu_dz_base, &cpu_dscale_base, &cpu_dbias_base); - framework::Tensor cpu_dx; - framework::Tensor cpu_dz; - framework::Tensor cpu_dscale; - framework::Tensor cpu_dbias; + phi::DenseTensor cpu_dx; + phi::DenseTensor cpu_dz; + phi::DenseTensor cpu_dscale; + phi::DenseTensor cpu_dbias; FusedBackward(*ctx, &cpu_dx, &cpu_dz, &cpu_dscale, &cpu_dbias); CheckOutput("DX", cpu_dx, cpu_dx_base, diff, is_relative_atol); @@ -546,7 +545,7 @@ class CudnnBNAddReluTester { cpu_y, saved_reserve_space_x); if (has_shortcut_) { - framework::Tensor cpu_z_out; + phi::DenseTensor cpu_z_out; InitMeanVar(cpu_mean_z, cpu_var_z, cpu_saved_mean_z, cpu_saved_var_z); ComputeBatchNormForward(ctx, cpu_z_, @@ -624,8 +623,8 @@ class CudnnBNAddReluTester { Tensor *saved_var, Tensor *equiv_scale, Tensor *equiv_bias) { - framework::Tensor cpu_sum; - framework::Tensor cpu_sum_of_square; + phi::DenseTensor cpu_sum; + phi::DenseTensor cpu_sum_of_square; ComputeSumAndSquareSum(cpu_x, &cpu_sum, &cpu_sum_of_square); auto place = ctx.GetPlace(); @@ -678,17 +677,17 @@ class CudnnBNAddReluTester { Tensor *cpu_var_z = nullptr, Tensor *cpu_saved_mean_z = nullptr, Tensor *cpu_saved_var_z = nullptr) { - framework::Tensor x; - framework::Tensor sum_x; - framework::Tensor sum_of_square_x; - framework::Tensor bn_scale_x; - framework::Tensor bn_bias_x; - - framework::Tensor z; - framework::Tensor sum_z; - framework::Tensor sum_of_square_z; - framework::Tensor bn_scale_z; - framework::Tensor bn_bias_z; + phi::DenseTensor x; + phi::DenseTensor sum_x; + phi::DenseTensor sum_of_square_x; + phi::DenseTensor bn_scale_x; + phi::DenseTensor bn_bias_x; + + phi::DenseTensor z; + phi::DenseTensor sum_z; + phi::DenseTensor sum_of_square_z; + phi::DenseTensor bn_scale_z; + phi::DenseTensor bn_bias_z; auto place = ctx.GetPlace(); paddle::framework::TensorCopySync(cpu_x_, place, &x); @@ -696,22 +695,22 @@ class CudnnBNAddReluTester { paddle::framework::TensorCopySync(cpu_z_, place, &z); } - framework::Tensor mean_x; - framework::Tensor var_x; - framework::Tensor saved_mean_x; - framework::Tensor saved_var_x; - framework::Tensor equiv_scale_x; - framework::Tensor equiv_bias_x; + phi::DenseTensor mean_x; + phi::DenseTensor var_x; + phi::DenseTensor saved_mean_x; + phi::DenseTensor saved_var_x; + phi::DenseTensor equiv_scale_x; + phi::DenseTensor equiv_bias_x; - framework::Tensor mean_z; - framework::Tensor var_z; - framework::Tensor saved_mean_z; - framework::Tensor saved_var_z; - framework::Tensor equiv_scale_z; - framework::Tensor equiv_bias_z; + phi::DenseTensor mean_z; + phi::DenseTensor var_z; + phi::DenseTensor saved_mean_z; + phi::DenseTensor saved_var_z; + phi::DenseTensor equiv_scale_z; + phi::DenseTensor equiv_bias_z; - framework::Tensor y; - framework::Tensor bitmask; + phi::DenseTensor y; + phi::DenseTensor bitmask; InitMeanVar(cpu_mean_x, cpu_var_x, cpu_saved_mean_x, cpu_saved_var_x); paddle::framework::TensorCopySync(*cpu_mean_x, place, &mean_x); @@ -810,17 +809,17 @@ class CudnnBNAddReluTester { Tensor *cpu_dz, Tensor *cpu_dscale, Tensor *cpu_dbias) { - framework::Tensor dy; - framework::Tensor x; - framework::Tensor bn_scale; - framework::Tensor bn_bias; - framework::Tensor saved_mean; - framework::Tensor saved_var; - framework::Tensor bitmask; - framework::Tensor dx; - framework::Tensor dz; - framework::Tensor dscale; - framework::Tensor dbias; + phi::DenseTensor dy; + phi::DenseTensor x; + phi::DenseTensor bn_scale; + phi::DenseTensor bn_bias; + phi::DenseTensor saved_mean; + phi::DenseTensor saved_var; + phi::DenseTensor bitmask; + phi::DenseTensor dx; + phi::DenseTensor dz; + phi::DenseTensor dscale; + phi::DenseTensor dbias; auto place = ctx.GetPlace(); paddle::framework::TensorCopySync(cpu_dy_, place, &dy); @@ -880,27 +879,27 @@ class CudnnBNAddReluTester { bool has_shortcut_; // Forward input - framework::Tensor cpu_x_; - framework::Tensor cpu_bn_scale_x_; - framework::Tensor cpu_bn_bias_x_; - framework::Tensor cpu_z_; - framework::Tensor cpu_bn_scale_z_; - framework::Tensor cpu_bn_bias_z_; + phi::DenseTensor cpu_x_; + phi::DenseTensor cpu_bn_scale_x_; + phi::DenseTensor cpu_bn_bias_x_; + phi::DenseTensor cpu_z_; + phi::DenseTensor cpu_bn_scale_z_; + phi::DenseTensor cpu_bn_bias_z_; // Backward input - framework::Tensor cpu_dy_; - framework::Tensor cpu_bitmask_; - framework::Tensor cpu_saved_mean_x_; - framework::Tensor cpu_saved_var_x_; - framework::Tensor cpu_saved_mean_z_; - framework::Tensor cpu_saved_var_z_; - framework::Tensor cpu_saved_mean_base_x_; - framework::Tensor cpu_saved_var_base_x_; - framework::Tensor saved_reserve_space_x_; - framework::Tensor cpu_saved_mean_base_z_; - framework::Tensor cpu_saved_var_base_z_; - framework::Tensor saved_reserve_space_z_; - framework::Tensor cpu_y_base_; + phi::DenseTensor cpu_dy_; + phi::DenseTensor cpu_bitmask_; + phi::DenseTensor cpu_saved_mean_x_; + phi::DenseTensor cpu_saved_var_x_; + phi::DenseTensor cpu_saved_mean_z_; + phi::DenseTensor cpu_saved_var_z_; + phi::DenseTensor cpu_saved_mean_base_x_; + phi::DenseTensor cpu_saved_var_base_x_; + phi::DenseTensor saved_reserve_space_x_; + phi::DenseTensor cpu_saved_mean_base_z_; + phi::DenseTensor cpu_saved_var_base_z_; + phi::DenseTensor saved_reserve_space_z_; + phi::DenseTensor cpu_y_base_; double eps_ = 1e-5; float momentum_ = 0.9; diff --git a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h index 86588331ec2b1..b2201c89295ca 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h +++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; namespace dynload = platform::dynload; template using BatchNormParamType = diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h index cde4ed061423e..01e5e24e0a016 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h +++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; namespace dynload = platform::dynload; template diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc index ef93612ffce39..be518866f5f00 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc +++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc @@ -28,7 +28,7 @@ limitations under the License. */ namespace framework = paddle::framework; namespace platform = paddle::platform; namespace op = paddle::operators; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; USE_OP_ITSELF(conv2d); USE_OP_ITSELF(conv2d_grad); @@ -37,7 +37,7 @@ PD_DECLARE_KERNEL(conv2d_grad, GPUDNN, ALL_LAYOUT); template void InitRandomTensor(const std::vector &dims, - framework::Tensor *cpu_out) { + phi::DenseTensor *cpu_out) { T *cpu_out_ptr = cpu_out->mutable_data(phi::make_ddim(dims), platform::CPUPlace()); @@ -49,8 +49,8 @@ void InitRandomTensor(const std::vector &dims, } template -void TransposeNchwToNhwc(const framework::Tensor &cpu_in, - framework::Tensor *cpu_out) { +void TransposeNchwToNhwc(const phi::DenseTensor &cpu_in, + phi::DenseTensor *cpu_out) { const auto &in_dims = cpu_in.dims(); EXPECT_EQ(cpu_in.dims().size(), 4); @@ -73,8 +73,8 @@ void TransposeNchwToNhwc(const framework::Tensor &cpu_in, } template -void CheckOutput(const framework::Tensor &cpu_res, - const framework::Tensor &cpu_base, +void CheckOutput(const phi::DenseTensor &cpu_res, + const phi::DenseTensor &cpu_base, float diff, bool is_relative_atol = false) { EXPECT_EQ(cpu_res.dims(), cpu_base.dims()); @@ -134,8 +134,8 @@ void ComputeConv2DBackward(const phi::GPUContext &ctx, const Tensor &cpu_input, const Tensor &cpu_filter, const Tensor &cpu_output_grad, - framework::Tensor *cpu_input_grad, - framework::Tensor *cpu_filter_grad, + phi::DenseTensor *cpu_input_grad, + phi::DenseTensor *cpu_filter_grad, int stride, int padding, int dilation) { @@ -191,9 +191,9 @@ void ComputeConv2DBackward(const phi::GPUContext &ctx, } template -void ComputeSumAndSquareSum(const framework::Tensor &cpu_out, - framework::Tensor *cpu_sum, - framework::Tensor *cpu_sum_of_square) { +void ComputeSumAndSquareSum(const phi::DenseTensor &cpu_out, + phi::DenseTensor *cpu_sum, + phi::DenseTensor *cpu_sum_of_square) { const auto &dims = cpu_out.dims(); int64_t c = dims[3]; @@ -245,15 +245,15 @@ class CudnnNormConvolutionTester { phi::GPUContext *ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); - framework::Tensor cpu_output_base; - framework::Tensor cpu_sum_base; - framework::Tensor cpu_sum_of_square_base; + phi::DenseTensor cpu_output_base; + phi::DenseTensor cpu_sum_base; + phi::DenseTensor cpu_sum_of_square_base; BaselineForward( *ctx, &cpu_output_base, &cpu_sum_base, &cpu_sum_of_square_base); - framework::Tensor cpu_output; - framework::Tensor cpu_sum; - framework::Tensor cpu_sum_of_square; + phi::DenseTensor cpu_output; + phi::DenseTensor cpu_sum; + phi::DenseTensor cpu_sum_of_square; FusedForward(*ctx, &cpu_output, &cpu_sum, &cpu_sum_of_square); // Check forward correctness between baseline and results of normconv. @@ -267,15 +267,15 @@ class CudnnNormConvolutionTester { phi::GPUContext *ctx = static_cast( platform::DeviceContextPool::Instance().Get(platform::CUDAPlace(0))); - framework::Tensor cpu_input_grad_base; - framework::Tensor cpu_filter_nchw_grad_base; - framework::Tensor cpu_filter_nhwc_grad_base; + phi::DenseTensor cpu_input_grad_base; + phi::DenseTensor cpu_filter_nchw_grad_base; + phi::DenseTensor cpu_filter_nhwc_grad_base; BaselineBackward(*ctx, &cpu_input_grad_base, &cpu_filter_nchw_grad_base); TransposeNchwToNhwc(cpu_filter_nchw_grad_base, &cpu_filter_nhwc_grad_base); - framework::Tensor cpu_input_grad; - framework::Tensor cpu_filter_nhwc_grad; + phi::DenseTensor cpu_input_grad; + phi::DenseTensor cpu_filter_nhwc_grad; FusedBackward(*ctx, &cpu_input_grad, &cpu_filter_nhwc_grad); // Check backward correctness between baseline and results of normconv. @@ -301,9 +301,9 @@ class CudnnNormConvolutionTester { } void BaselineForward(const phi::GPUContext &ctx, - framework::Tensor *cpu_output_base, - framework::Tensor *cpu_sum_base, - framework::Tensor *cpu_sum_of_square_base) { + phi::DenseTensor *cpu_output_base, + phi::DenseTensor *cpu_sum_base, + phi::DenseTensor *cpu_sum_of_square_base) { ComputeConv2DForward( ctx, cpu_input_, cpu_filter_nchw_, cpu_output_base, stride_, padding_); ComputeSumAndSquareSum( @@ -311,8 +311,8 @@ class CudnnNormConvolutionTester { } void BaselineBackward(const phi::GPUContext &ctx, - framework::Tensor *cpu_input_grad_base, - framework::Tensor *cpu_filter_grad_base) { + phi::DenseTensor *cpu_input_grad_base, + phi::DenseTensor *cpu_filter_grad_base) { ComputeConv2DBackward(ctx, cpu_input_, cpu_filter_nchw_, @@ -326,14 +326,14 @@ class CudnnNormConvolutionTester { // get forward results of cudnn_norm_conv void FusedForward(const phi::GPUContext &ctx, - framework::Tensor *cpu_output, - framework::Tensor *cpu_sum, - framework::Tensor *cpu_sum_of_square) { - framework::Tensor input; - framework::Tensor filter_nhwc; - framework::Tensor output; - framework::Tensor sum; - framework::Tensor sum_of_square; + phi::DenseTensor *cpu_output, + phi::DenseTensor *cpu_sum, + phi::DenseTensor *cpu_sum_of_square) { + phi::DenseTensor input; + phi::DenseTensor filter_nhwc; + phi::DenseTensor output; + phi::DenseTensor sum; + phi::DenseTensor sum_of_square; auto place = ctx.GetPlace(); paddle::framework::TensorCopySync(cpu_input_, place, &input); @@ -364,13 +364,13 @@ class CudnnNormConvolutionTester { } void FusedBackward(const phi::GPUContext &ctx, - framework::Tensor *cpu_input_grad, - framework::Tensor *cpu_filter_grad) { - framework::Tensor input; - framework::Tensor filter_nhwc; - framework::Tensor output_grad; - framework::Tensor input_grad; - framework::Tensor filter_grad; + phi::DenseTensor *cpu_input_grad, + phi::DenseTensor *cpu_filter_grad) { + phi::DenseTensor input; + phi::DenseTensor filter_nhwc; + phi::DenseTensor output_grad; + phi::DenseTensor input_grad; + phi::DenseTensor filter_grad; auto place = ctx.GetPlace(); paddle::framework::TensorCopySync(cpu_input_, place, &input); @@ -415,12 +415,12 @@ class CudnnNormConvolutionTester { const int group_ = 1; // Forward input - framework::Tensor cpu_input_; - framework::Tensor cpu_filter_nchw_; - framework::Tensor cpu_filter_nhwc_; + phi::DenseTensor cpu_input_; + phi::DenseTensor cpu_filter_nchw_; + phi::DenseTensor cpu_filter_nhwc_; // Backward input - framework::Tensor cpu_output_grad_; + phi::DenseTensor cpu_output_grad_; }; // test for fp16, kernel = 1, output_channels = input_channels diff --git a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h index 60cf314c5ea3c..188f767daf1c8 100644 --- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h +++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; namespace dynload = platform::dynload; diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 7de59dd9ee2e3..4854f81eae469 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AttnDropoutParam { public: @@ -46,7 +46,7 @@ class AttnDropoutParam { bool is_upscale_in_train, bool is_fix_seed, int seed_val, - const Tensor* seed) { + const phi::DenseTensor* seed) { is_test_ = is_test; dropout_implementation_ = dropout_implementation; dropout_prob_ = dropout_prob; @@ -61,7 +61,7 @@ class AttnDropoutParam { bool is_upscale_in_train_; bool is_fix_seed_; int seed_val_; - const Tensor* seed_; + const phi::DenseTensor* seed_; }; template @@ -82,18 +82,18 @@ class FMHARef { ~FMHARef() {} - void ComputeForward(const Tensor& qkv_input_tensor, - const Tensor* cache_kv_tensor, - const Tensor* src_mask_tensor, - Tensor* transpose_2_out_tensor, - Tensor* cache_kv_out_tensor, - Tensor* qk_out_tensor, - Tensor* src_mask_out_tensor, - Tensor* softmax_out_tensor, - Tensor* dropout_mask_out_tensor, - Tensor* dropout_out_tensor, - Tensor* qktv_out_tensor, - Tensor* fmha_out_tensor) { + void ComputeForward(const phi::DenseTensor& qkv_input_tensor, + const phi::DenseTensor* cache_kv_tensor, + const phi::DenseTensor* src_mask_tensor, + phi::DenseTensor* transpose_2_out_tensor, + phi::DenseTensor* cache_kv_out_tensor, + phi::DenseTensor* qk_out_tensor, + phi::DenseTensor* src_mask_out_tensor, + phi::DenseTensor* softmax_out_tensor, + phi::DenseTensor* dropout_mask_out_tensor, + phi::DenseTensor* dropout_out_tensor, + phi::DenseTensor* qktv_out_tensor, + phi::DenseTensor* fmha_out_tensor) { // input shape: [bs, seq_len, 3, num_head, head_dim] // transpose with perm [2, 0, 3, 1, 4], // output_shape: [3, bs, num_head, seq_len, head_dim] @@ -138,8 +138,8 @@ class FMHARef { float alpha = 1.0 / sqrt(head_dim_); auto q_tensor = transpose_2_out_tensor->Slice(0, 1); auto functor = phi::funcs::ScaleFunctor(alpha); - std::vector ins = {&q_tensor}; - std::vector outs = {&q_tensor}; + std::vector ins = {&q_tensor}; + std::vector outs = {&q_tensor}; phi::funcs::ElementwiseKernel(dev_ctx_, ins, &outs, functor); } @@ -179,8 +179,8 @@ class FMHARef { seq_len_, dev_ctx_.stream()); } else { - std::vector ins; - std::vector outs; + std::vector ins; + std::vector outs; ins.emplace_back(qk_out_tensor); ins.emplace_back(src_mask_tensor); outs.emplace_back(src_mask_out_tensor); @@ -216,7 +216,7 @@ class FMHARef { dropout_param_.is_upscale_in_train_, dropout_param_.is_fix_seed_, dropout_param_.seed_val_, - static_cast(*softmax_out_tensor), + static_cast(*softmax_out_tensor), dropout_param_.seed_, dropout_mask_out_tensor, dropout_out_tensor, @@ -258,22 +258,22 @@ class FMHARef { dev_ctx_, *qktv_out_tensor, perm_3, fmha_out_tensor); } - void ComputeBackward(const Tensor& transpose_2_out_tensor, - const Tensor* src_mask_tensor, - const Tensor& softmax_out_tensor, - const Tensor& dropout_mask_out_tensor, - const Tensor& dropout_out_tensor, - const Tensor& qk_out_tensor, - const Tensor& src_mask_out_tensor, - const Tensor& fmha_out_grad_tensor, - Tensor* qktv_out_grad_tensor, - Tensor* dropout_out_grad_tensor, - Tensor* softmax_out_grad_tensor, - Tensor* src_mask_out_grad_tensor, - Tensor* qk_out_grad_tensor, - Tensor* transpose_2_out_grad_tensor, - Tensor* src_mask_grad_tensor, - Tensor* qkv_input_grad_tensor) { + void ComputeBackward(const phi::DenseTensor& transpose_2_out_tensor, + const phi::DenseTensor* src_mask_tensor, + const phi::DenseTensor& softmax_out_tensor, + const phi::DenseTensor& dropout_mask_out_tensor, + const phi::DenseTensor& dropout_out_tensor, + const phi::DenseTensor& qk_out_tensor, + const phi::DenseTensor& src_mask_out_tensor, + const phi::DenseTensor& fmha_out_grad_tensor, + phi::DenseTensor* qktv_out_grad_tensor, + phi::DenseTensor* dropout_out_grad_tensor, + phi::DenseTensor* softmax_out_grad_tensor, + phi::DenseTensor* src_mask_out_grad_tensor, + phi::DenseTensor* qk_out_grad_tensor, + phi::DenseTensor* transpose_2_out_grad_tensor, + phi::DenseTensor* src_mask_grad_tensor, + phi::DenseTensor* qkv_input_grad_tensor) { auto blas = phi::funcs::GetBlas(dev_ctx_); int q_size = batch_size_ * seq_len_ * num_head_ * head_dim_; int k_size = q_size; @@ -385,7 +385,7 @@ class FMHARef { false, dropout_param_.dropout_prob_, dropout_param_.is_upscale_in_train_, - static_cast(*dropout_out_grad_tensor), + static_cast(*dropout_out_grad_tensor), dropout_mask_out_tensor, softmax_out_grad_tensor, false); diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index 90f6d34535196..e1c3bcdd83f46 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedAttentionOp : public framework::OperatorWithKernel { public: @@ -257,7 +257,7 @@ class FusedAttentionOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto input = ctx.Input("X"); + auto input = ctx.Input("X"); auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } @@ -567,7 +567,7 @@ class FusedAttentionGradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto input = ctx.Input("X"); + auto input = ctx.Input("X"); auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index 059d94031ac8e..62ea3f723dc9e 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -38,10 +38,10 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template -static void AllReduce(framework::Tensor &tensor, // NOLINT +static void AllReduce(phi::DenseTensor &tensor, // NOLINT const int ring_id, const phi::GPUContext &ctx) { if (ring_id == -1) return; @@ -82,46 +82,47 @@ class FusedAttentionOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { using U = LayerNormParamType; - auto *input_x = ctx.Input("X"); + auto *input_x = ctx.Input("X"); auto &dev_ctx = ctx.template device_context(); const auto pre_layer_norm = ctx.Attr("pre_layer_norm"); const float epsilon = ctx.Attr("epsilon"); - auto *ln_scale = ctx.Input("LnScale"); - auto *ln_bias = ctx.Input("LnBias"); - auto *ln_mean = ctx.Output("LnMean"); - auto *ln_var = ctx.Output("LnVariance"); - auto *ln_out = ctx.Output("LnOut"); + auto *ln_scale = ctx.Input("LnScale"); + auto *ln_bias = ctx.Input("LnBias"); + auto *ln_mean = ctx.Output("LnMean"); + auto *ln_var = ctx.Output("LnVariance"); + auto *ln_out = ctx.Output("LnOut"); // x: qkv's input [batch_size, seq_len, dim_embed] // y: qkv's weight: [3, num_head, dim_head, dim_embed] - auto *qkv_weight = ctx.Input("QKVW"); - auto *qkv_bias = ctx.Input("QKVBias"); - auto *qkv_out = ctx.Output("QKVOut"); - auto *qkv_bias_out = ctx.Output("QKVBiasOut"); - - auto *src_mask = ctx.Input("SrcMask"); - auto *transpose_out_2 = ctx.Output("TransposeOut2"); - auto *cache_kv = ctx.Input("CacheKV"); - auto *cache_kv_out = ctx.Output("CacheKVOut"); - auto *qk_out = ctx.Output("QKOut"); - auto *qktv_out = ctx.Output("QKTVOut"); - auto *softmax_out = ctx.Output("SoftmaxOut"); - auto *attn_dropout_mask_out = ctx.Output("AttnDropoutMaskOut"); - auto *attn_dropout_out = ctx.Output("AttnDropoutOut"); - auto *src_mask_out = ctx.Output("SrcMaskOut"); - auto *fmha_out = ctx.Output("FMHAOut"); - - auto *out_linear_weight = ctx.Input("OutLinearW"); - auto *out_linear_bias = ctx.Input("OutLinearBias"); - auto *out_linear_out = ctx.Output("OutLinearOut"); - - auto *ln_scale_2 = ctx.Input("Ln2Scale"); - auto *ln_bias_2 = ctx.Input("Ln2Bias"); - auto *dropout_mask_out = ctx.Output("DropoutMaskOut"); + auto *qkv_weight = ctx.Input("QKVW"); + auto *qkv_bias = ctx.Input("QKVBias"); + auto *qkv_out = ctx.Output("QKVOut"); + auto *qkv_bias_out = ctx.Output("QKVBiasOut"); + + auto *src_mask = ctx.Input("SrcMask"); + auto *transpose_out_2 = ctx.Output("TransposeOut2"); + auto *cache_kv = ctx.Input("CacheKV"); + auto *cache_kv_out = ctx.Output("CacheKVOut"); + auto *qk_out = ctx.Output("QKOut"); + auto *qktv_out = ctx.Output("QKTVOut"); + auto *softmax_out = ctx.Output("SoftmaxOut"); + auto *attn_dropout_mask_out = + ctx.Output("AttnDropoutMaskOut"); + auto *attn_dropout_out = ctx.Output("AttnDropoutOut"); + auto *src_mask_out = ctx.Output("SrcMaskOut"); + auto *fmha_out = ctx.Output("FMHAOut"); + + auto *out_linear_weight = ctx.Input("OutLinearW"); + auto *out_linear_bias = ctx.Input("OutLinearBias"); + auto *out_linear_out = ctx.Output("OutLinearOut"); + + auto *ln_scale_2 = ctx.Input("Ln2Scale"); + auto *ln_bias_2 = ctx.Input("Ln2Bias"); + auto *dropout_mask_out = ctx.Output("DropoutMaskOut"); auto *bias_dropout_residual_out = - ctx.Output("BiasDropoutResidualOut"); - auto *ln_mean_2 = ctx.Output("Ln2Mean"); - auto *ln_var_2 = ctx.Output("Ln2Variance"); + ctx.Output("BiasDropoutResidualOut"); + auto *ln_mean_2 = ctx.Output("Ln2Mean"); + auto *ln_var_2 = ctx.Output("Ln2Variance"); const float ln_epsilon = ctx.Attr("ln_epsilon"); float attn_dropout_rate = ctx.Attr("attn_dropout_rate"); @@ -130,13 +131,14 @@ class FusedAttentionOpKernel : public framework::OpKernel { ctx.Attr("attn_dropout_implementation"); bool is_upscale_in_train_1 = (dropout_implementation_1 == "upscale_in_train"); - auto *seed_1 = ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; + auto *seed_1 = + ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; bool is_fix_seed_1 = ctx.Attr("attn_dropout_fix_seed"); int seed_val_1 = ctx.Attr("attn_dropout_seed"); int ring_id = ctx.Attr("ring_id"); // final output. - auto *out = ctx.Output("Y"); + auto *out = ctx.Output("Y"); // get data ptr for qkv part. const auto input_x_dims = input_x->dims(); @@ -377,29 +379,30 @@ class FusedAttentionGradKernel : public framework::OpKernel { ctx.Attr("attn_dropout_implementation"); bool is_upscale_in_train_1 = (dropout_implementation_1 == "upscale_in_train"); - auto *seed_1 = ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; + auto *seed_1 = + ctx.HasInput("Seed1") ? ctx.Input("Seed1") : nullptr; bool is_fix_seed_1 = ctx.Attr("attn_dropout_fix_seed"); int seed_val_1 = ctx.Attr("attn_dropout_seed"); int ring_id = ctx.Attr("ring_id"); // get inputs. - auto *d_y = ctx.Input(framework::GradVarName("Y")); + auto *d_y = ctx.Input(framework::GradVarName("Y")); auto *d_y_data = d_y->data(); // fw input - auto *input_x = ctx.Input("X"); - auto *ln_scale = ctx.Input("LnScale"); - auto *ln_2_scale = ctx.Input("Ln2Scale"); + auto *input_x = ctx.Input("X"); + auto *ln_scale = ctx.Input("LnScale"); + auto *ln_2_scale = ctx.Input("Ln2Scale"); auto *x_data = input_x->data(); auto *ln_scale_data = (ln_scale == nullptr ? nullptr : ln_scale->data()); auto *ln_2_scale_data = (ln_2_scale == nullptr ? nullptr : ln_2_scale->data()); // fw parameters. - auto *src_mask = ctx.Input("SrcMask"); - auto *qkv_weight = ctx.Input("QKVW"); - auto *qkv_bias = ctx.Input("QKVBias"); - auto *out_linear_weight = ctx.Input("OutLinearW"); - auto *out_linear_bias = ctx.Input("OutLinearBias"); + auto *src_mask = ctx.Input("SrcMask"); + auto *qkv_weight = ctx.Input("QKVW"); + auto *qkv_bias = ctx.Input("QKVBias"); + auto *out_linear_weight = ctx.Input("OutLinearW"); + auto *out_linear_bias = ctx.Input("OutLinearBias"); auto *src_mask_data = (src_mask == nullptr ? nullptr : src_mask->data()); auto *qkv_weight_data = qkv_weight->data(); auto *qkv_bias_data = (qkv_bias == nullptr) ? nullptr : qkv_bias->data(); @@ -408,20 +411,21 @@ class FusedAttentionGradKernel : public framework::OpKernel { (out_linear_bias == nullptr) ? nullptr : out_linear_bias->data(); // fw output - auto *fmha_out = ctx.Input("FMHAOut"); - auto *transpose_out_2 = ctx.Input("TransposeOut2"); - auto *qk_out = ctx.Input("QKOut"); - auto *qktv_out = ctx.Input("QKTVOut"); - auto *softmax_out = ctx.Input("SoftmaxOut"); - auto *attn_dropout_mask_out = ctx.Input("AttnDropoutMaskOut"); - auto *attn_dropout_out = ctx.Input("AttnDropoutOut"); - auto *src_mask_out = ctx.Input("SrcMaskOut"); - auto *out_linear_out = ctx.Input("OutLinearOut"); - auto *ln_2_mean = ctx.Input("Ln2Mean"); - auto *ln_2_var = ctx.Input("Ln2Variance"); - auto *dropout_mask_out = ctx.Input("DropoutMaskOut"); + auto *fmha_out = ctx.Input("FMHAOut"); + auto *transpose_out_2 = ctx.Input("TransposeOut2"); + auto *qk_out = ctx.Input("QKOut"); + auto *qktv_out = ctx.Input("QKTVOut"); + auto *softmax_out = ctx.Input("SoftmaxOut"); + auto *attn_dropout_mask_out = + ctx.Input("AttnDropoutMaskOut"); + auto *attn_dropout_out = ctx.Input("AttnDropoutOut"); + auto *src_mask_out = ctx.Input("SrcMaskOut"); + auto *out_linear_out = ctx.Input("OutLinearOut"); + auto *ln_2_mean = ctx.Input("Ln2Mean"); + auto *ln_2_var = ctx.Input("Ln2Variance"); + auto *dropout_mask_out = ctx.Input("DropoutMaskOut"); auto *bias_dropout_residual_out = - ctx.Input("BiasDropoutResidualOut"); + ctx.Input("BiasDropoutResidualOut"); auto *fmha_out_data = fmha_out->data(); auto *transpose_out_2_data = transpose_out_2->data(); auto *qk_out_data = qk_out->data(); @@ -433,25 +437,29 @@ class FusedAttentionGradKernel : public framework::OpKernel { auto *dropout_mask_out_data = dropout_mask_out->data(); // output's grad - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_qkv_out = ctx.Output(framework::GradVarName("QKVOut")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_qkv_out = + ctx.Output(framework::GradVarName("QKVOut")); auto *d_qkv_bias_out = - ctx.Output(framework::GradVarName("QKVBiasOut")); - auto *d_qktv_out = ctx.Output(framework::GradVarName("QKTVOut")); + ctx.Output(framework::GradVarName("QKVBiasOut")); + auto *d_qktv_out = + ctx.Output(framework::GradVarName("QKTVOut")); auto *d_transpose_out_2 = - ctx.Output(framework::GradVarName("TransposeOut2")); - auto *d_qk_out = ctx.Output(framework::GradVarName("QKOut")); + ctx.Output(framework::GradVarName("TransposeOut2")); + auto *d_qk_out = + ctx.Output(framework::GradVarName("QKOut")); auto *d_softmax_out = - ctx.Output(framework::GradVarName("SoftmaxOut")); + ctx.Output(framework::GradVarName("SoftmaxOut")); auto *d_attn_dropout_out = - ctx.Output(framework::GradVarName("AttnDropoutOut")); + ctx.Output(framework::GradVarName("AttnDropoutOut")); auto *d_src_mask_out = - ctx.Output(framework::GradVarName("SrcMaskOut")); - auto *d_fmha_out = ctx.Output(framework::GradVarName("FMHAOut")); + ctx.Output(framework::GradVarName("SrcMaskOut")); + auto *d_fmha_out = + ctx.Output(framework::GradVarName("FMHAOut")); auto *d_out_linear_out = - ctx.Output(framework::GradVarName("OutLinearOut")); - auto *d_bias_dropout_residual_out = - ctx.Output(framework::GradVarName("BiasDropoutResidualOut")); + ctx.Output(framework::GradVarName("OutLinearOut")); + auto *d_bias_dropout_residual_out = ctx.Output( + framework::GradVarName("BiasDropoutResidualOut")); auto *d_x_data = dev_ctx.template Alloc(d_x, d_x->numel() * sizeof(T)); // when qkv_bias is not nullptr, d_qkv_out is equals to d_qkv_bias_out, the // space can be reused. @@ -485,14 +493,18 @@ class FusedAttentionGradKernel : public framework::OpKernel { d_out_linear_out, d_out_linear_out->numel() * sizeof(T)); // parameter grad - auto *d_qkv_weight = ctx.Output(framework::GradVarName("QKVW")); - auto *d_qkv_bias = ctx.Output(framework::GradVarName("QKVBias")); + auto *d_qkv_weight = + ctx.Output(framework::GradVarName("QKVW")); + auto *d_qkv_bias = + ctx.Output(framework::GradVarName("QKVBias")); auto *d_out_linear_weight = - ctx.Output(framework::GradVarName("OutLinearW")); + ctx.Output(framework::GradVarName("OutLinearW")); auto *d_out_linear_bias = - ctx.Output(framework::GradVarName("OutLinearBias")); - auto *d_ln_2_scale = ctx.Output(framework::GradVarName("Ln2Scale")); - auto *d_ln_2_bias = ctx.Output(framework::GradVarName("Ln2Bias")); + ctx.Output(framework::GradVarName("OutLinearBias")); + auto *d_ln_2_scale = + ctx.Output(framework::GradVarName("Ln2Scale")); + auto *d_ln_2_bias = + ctx.Output(framework::GradVarName("Ln2Bias")); auto *d_qkv_weight_data = dev_ctx.template Alloc( d_qkv_weight, d_qkv_weight->numel() * sizeof(T)); @@ -664,16 +676,19 @@ class FusedAttentionGradKernel : public framework::OpKernel { } if (pre_layer_norm) { - auto *ln_mean = ctx.Input("LnMean"); - auto *ln_var = ctx.Input("LnVariance"); - auto *ln_out = ctx.Input("LnOut"); + auto *ln_mean = ctx.Input("LnMean"); + auto *ln_var = ctx.Input("LnVariance"); + auto *ln_out = ctx.Input("LnOut"); auto *ln_mean_data = ln_mean->data(); auto *ln_var_data = ln_var->data(); auto *ln_out_data = ln_out->data(); - auto *d_ln_out = ctx.Output(framework::GradVarName("LnOut")); - auto *d_ln_scale = ctx.Output(framework::GradVarName("LnScale")); - auto *d_ln_bias = ctx.Output(framework::GradVarName("LnBias")); + auto *d_ln_out = + ctx.Output(framework::GradVarName("LnOut")); + auto *d_ln_scale = + ctx.Output(framework::GradVarName("LnScale")); + auto *d_ln_bias = + ctx.Output(framework::GradVarName("LnBias")); auto *d_ln_out_data = dev_ctx.template Alloc(d_ln_out, d_ln_out->numel() * sizeof(T)); auto *d_ln_scale_data = diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc index 3e888a2e67fc7..94131197060b5 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel { public: @@ -64,7 +64,7 @@ class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto input = ctx.Input("X"); + auto input = ctx.Input("X"); auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } @@ -194,7 +194,7 @@ class FusedBiasDropoutResidualLnGradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto input = ctx.Input("X"); + auto input = ctx.Input("X"); auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu index b194f07c848da..6da533aa77f3c 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu @@ -25,7 +25,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel { @@ -33,18 +33,18 @@ class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); using U = LayerNormParamType; - auto *input_x = ctx.Input("X"); - auto *bias = ctx.Input("Bias"); - auto *residual = ctx.Input("Residual"); + auto *input_x = ctx.Input("X"); + auto *bias = ctx.Input("Bias"); + auto *residual = ctx.Input("Residual"); const float ln_epsilon = ctx.Attr("ln_epsilon"); - auto *ln_scale = ctx.Input("LnScale"); - auto *ln_bias = ctx.Input("LnBias"); - auto *dropout_mask_out = ctx.Output("DropoutMaskOut"); + auto *ln_scale = ctx.Input("LnScale"); + auto *ln_bias = ctx.Input("LnBias"); + auto *dropout_mask_out = ctx.Output("DropoutMaskOut"); auto *bias_dropout_residual_out = - ctx.Output("BiasDropoutResidualOut"); - auto *ln_mean = ctx.Output("LnMean"); - auto *ln_var = ctx.Output("LnVariance"); - auto *y = ctx.Output("Y"); + ctx.Output("BiasDropoutResidualOut"); + auto *ln_mean = ctx.Output("LnMean"); + auto *ln_var = ctx.Output("LnVariance"); + auto *y = ctx.Output("Y"); auto *x_data = input_x->data(); auto *bias_data = (bias == nullptr) ? nullptr : bias->data(); auto *residual_data = (residual == nullptr) ? nullptr : residual->data(); @@ -96,13 +96,13 @@ class FusedBiasDropoutResidualLnGradKernel : public framework::OpKernel { using U = LayerNormParamType; const float ln_epsilon = ctx.Attr("ln_epsilon"); auto &dev_ctx = ctx.template device_context(); - auto *d_y = ctx.Input(framework::GradVarName("Y")); - auto *ln_scale = ctx.Input("LnScale"); - auto *dropout_mask_out = ctx.Input("DropoutMaskOut"); + auto *d_y = ctx.Input(framework::GradVarName("Y")); + auto *ln_scale = ctx.Input("LnScale"); + auto *dropout_mask_out = ctx.Input("DropoutMaskOut"); auto *bias_dropout_residual_out = - ctx.Input("BiasDropoutResidualOut"); - auto *ln_mean = ctx.Input("LnMean"); - auto *ln_var = ctx.Input("LnVariance"); + ctx.Input("BiasDropoutResidualOut"); + auto *ln_mean = ctx.Input("LnMean"); + auto *ln_var = ctx.Input("LnVariance"); auto *d_y_data = d_y->data(); auto *ln_scale_data = (ln_scale == nullptr ? nullptr : ln_scale->data()); auto *dropout_mask_out_data = dropout_mask_out->data(); @@ -110,13 +110,16 @@ class FusedBiasDropoutResidualLnGradKernel : public framework::OpKernel { auto *ln_mean_data = ln_mean->data(); auto *ln_var_data = ln_var->data(); - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_residual = ctx.Output(framework::GradVarName("Residual")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - auto *d_bias_dropout_residual_out = - ctx.Output(framework::GradVarName("BiasDropoutResidualOut")); - auto *d_ln_scale = ctx.Output(framework::GradVarName("LnScale")); - auto *d_ln_bias = ctx.Output(framework::GradVarName("LnBias")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_residual = + ctx.Output(framework::GradVarName("Residual")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + auto *d_bias_dropout_residual_out = ctx.Output( + framework::GradVarName("BiasDropoutResidualOut")); + auto *d_ln_scale = + ctx.Output(framework::GradVarName("LnScale")); + auto *d_ln_bias = + ctx.Output(framework::GradVarName("LnBias")); auto *d_x_data = dev_ctx.Alloc(d_x, d_x->numel() * sizeof(T)); auto *d_residual_data = dev_ctx.Alloc(d_residual, d_residual->numel() * sizeof(T)); diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index 2fdd38bc266fc..9a773fa91dc9c 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -168,26 +168,26 @@ framework::OpKernelType FusedBatchNormActOp::GetExpectedKernelType( if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), - platform::errors::PreconditionNotMet( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), - platform::errors::PreconditionNotMet( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), - platform::errors::PreconditionNotMet( - "Mean input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), - platform::errors::PreconditionNotMet( - "Variance input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + platform::errors::PreconditionNotMet( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + platform::errors::PreconditionNotMet( + "Bias input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), + platform::errors::PreconditionNotMet( + "Mean input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), + platform::errors::PreconditionNotMet( + "Variance input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu index 1a22de67b53db..c7fbdc88abb33 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu @@ -30,7 +30,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; template @@ -59,35 +59,35 @@ class FusedBatchNormActKernel // Get the size for each dimension. // NHWC [batch_size, in_height, in_width, in_channels] - const auto *x = ctx.Input("X"); + const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); PADDLE_ENFORCE_EQ(x_dims.size() >= 2 && x_dims.size() <= 5, true, platform::errors::PreconditionNotMet( "The Input dim size should be between 2 and 5")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); // Run training mode. // obtain running mean and running inv var, and see if we need to // initialize them. - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); dev_ctx.Alloc>( mean_out, mean_out->numel() * sizeof(BatchNormParamType)); dev_ctx.Alloc>( variance_out, variance_out->numel() * sizeof(BatchNormParamType)); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); dev_ctx.Alloc>( saved_mean, saved_mean->numel() * sizeof(BatchNormParamType)); dev_ctx.Alloc>( saved_variance, saved_variance->numel() * sizeof(BatchNormParamType)); - auto *y = ctx.Output("Y"); + auto *y = ctx.Output("Y"); dev_ctx.Alloc(y, y->numel() * sizeof(T)); int N, C, H, W, D; @@ -147,7 +147,7 @@ class FusedBatchNormActKernel // Create reserve space and workspace for batch norm. // Create tensor for each batchnorm op, it will be used in the // backward. Thus this tensor shouldn't be temp. - auto *reserve_space = ctx.Output("ReserveSpace"); + auto *reserve_space = ctx.Output("ReserveSpace"); PADDLE_ENFORCE_NOT_NULL( reserve_space, platform::errors::NotFound( @@ -243,12 +243,12 @@ class FusedBatchNormActGradKernel double epsilon = static_cast(ctx.Attr("epsilon")); std::string act_type = ctx.Attr("act_type"); auto &dev_ctx = ctx.template device_context(); - const auto *x = ctx.Input("X"); - const auto *y = ctx.Input("Y"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *reserve_space = ctx.Input("ReserveSpace"); + const auto *x = ctx.Input("X"); + const auto *y = ctx.Input("Y"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *reserve_space = ctx.Input("ReserveSpace"); const auto &x_dims = x->dims(); @@ -261,9 +261,10 @@ class FusedBatchNormActGradKernel ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D); // init output - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); dev_ctx.Alloc(d_x, d_x->numel() * sizeof(T)); PADDLE_ENFORCE_EQ( @@ -330,8 +331,8 @@ class FusedBatchNormActGradKernel PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); - const auto *saved_mean = ctx.Input("SavedMean"); - const auto *saved_var = ctx.Input("SavedVariance"); + const auto *saved_mean = ctx.Input("SavedMean"); + const auto *saved_var = ctx.Input("SavedVariance"); const auto *saved_mean_data = saved_mean->template data>(); const auto *saved_var_data = diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.h b/paddle/fluid/operators/fused/fused_bn_activation_op.h index c848a917c1dac..d7e5d236359bd 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedBatchNormActOp : public framework::OperatorWithKernel { public: @@ -39,7 +39,7 @@ class FusedBatchNormActOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index 84bcd9e591966..cf0f97cdc0037 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -145,11 +145,13 @@ framework::OpKernelType FusedBatchNormAddActOp::GetExpectedKernelType( PADDLE_ENFORCE_EQ( bn_param_type, - framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), platform::errors::InvalidArgument("Scale input should be of float type")); PADDLE_ENFORCE_EQ( bn_param_type, - framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), platform::errors::InvalidArgument("Bias input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu index 6d541f0784234..5a192b2df5c94 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -30,7 +30,7 @@ DECLARE_bool(cudnn_batchnorm_spatial_persistent); namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; template @@ -59,29 +59,29 @@ class FusedBatchNormAddActKernel // Get the size for each dimension. // NHWC [batch_size, in_height, in_width, in_channels] - const auto *x = ctx.Input("X"); - const auto *z = ctx.Input("Z"); + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); const auto &in_dims = x->dims(); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); dev_ctx.Alloc>( mean_out, mean_out->numel() * sizeof(BatchNormParamType)); dev_ctx.Alloc>( variance_out, variance_out->numel() * sizeof(BatchNormParamType)); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); dev_ctx.Alloc>( saved_mean, saved_mean->numel() * sizeof(BatchNormParamType)); dev_ctx.Alloc>( saved_variance, saved_variance->numel() * sizeof(BatchNormParamType)); - auto *y = ctx.Output("Y"); + auto *y = ctx.Output("Y"); dev_ctx.Alloc(y, y->numel() * sizeof(T)); int N, C, H, W, D; @@ -124,7 +124,7 @@ class FusedBatchNormAddActKernel // Create reserve space and workspace for batch norm. // Create tensor for each batchnorm op, it will be used in the // backward. Thus this tensor shouldn't be temp. - auto *reserve_space = ctx.Output("ReserveSpace"); + auto *reserve_space = ctx.Output("ReserveSpace"); PADDLE_ENFORCE_NOT_NULL( reserve_space, platform::errors::NotFound( @@ -220,12 +220,12 @@ class FusedBatchNormAddActGradKernel double epsilon = static_cast(ctx.Attr("epsilon")); std::string act_type = ctx.Attr("act_type"); - const auto *x = ctx.Input("X"); - const auto *y = ctx.Input("Y"); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *reserve_space = ctx.Input("ReserveSpace"); + const auto *x = ctx.Input("X"); + const auto *y = ctx.Input("Y"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *reserve_space = ctx.Input("ReserveSpace"); auto &dev_ctx = ctx.template device_context(); const auto &in_dims = x->dims(); @@ -235,10 +235,11 @@ class FusedBatchNormAddActGradKernel ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); // init output - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_z = ctx.Output(framework::GradVarName("Z")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_z = ctx.Output(framework::GradVarName("Z")); + auto *d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); d_x->mutable_data(ctx.GetPlace()); d_z->mutable_data(ctx.GetPlace()); @@ -286,8 +287,8 @@ class FusedBatchNormAddActGradKernel PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); - const auto *saved_mean = ctx.Input("SavedMean"); - const auto *saved_var = ctx.Input("SavedVariance"); + const auto *saved_mean = ctx.Input("SavedMean"); + const auto *saved_var = ctx.Input("SavedVariance"); const auto *saved_mean_data = saved_mean->template data>(); const auto *saved_var_data = diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h index 07d2e4564b692..f4913bca3df98 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedBatchNormAddActOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu index 06810c18cc05a..56f150c2dce42 100644 --- a/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu +++ b/paddle/fluid/operators/fused/fused_dropout_act_bias_test.cu @@ -49,8 +49,8 @@ struct TestFusedDropoutActBias { bool is_upscale_in_train; bool is_test; // default false, Set to true for inference only bool has_bias = true; - framework::Tensor src, bias, out, mask; - framework::Tensor dsrc, dbias; + phi::DenseTensor src, bias, out, mask; + phi::DenseTensor dsrc, dbias; std::vector src_vec, bias_vec, out_vec, mask_vec; std::vector correct_out, correct_dsrc, correct_dbias; diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h index 2d1491fefb07e..5d6dd1a5bbf81 100644 --- a/paddle/fluid/operators/fused/fused_dropout_helper.h +++ b/paddle/fluid/operators/fused/fused_dropout_helper.h @@ -38,7 +38,7 @@ struct DropoutParam { bool is_test; bool fix_seed; int increment; - const framework::Tensor* tensor_seed; + const phi::DenseTensor* tensor_seed; int seed_val; DropoutParam() { @@ -56,7 +56,7 @@ struct DropoutParam { bool is_test_, bool is_upscale_in_train_, float dropout_prob_, - const framework::Tensor* tensor_seed_, + const phi::DenseTensor* tensor_seed_, int seed_val_) { fix_seed = fix_seed_; seed = seed_; @@ -95,8 +95,9 @@ struct DropoutParam { } else { str_seed = str_seed + "Seed"; } - tensor_seed = - context.HasInput(str_seed) ? context.Input(str_seed) : nullptr; + tensor_seed = context.HasInput(str_seed) + ? context.Input(str_seed) + : nullptr; seed_val = context.Attr(pre_fix + "seed"); } diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc index 8560907680480..8c81a646fdebb 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc @@ -174,8 +174,8 @@ class FusedElemwiseActivationOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - PADDLE_ENFORCE_EQ(ctx.Input("X")->dtype(), - ctx.Input("Y")->dtype(), + PADDLE_ENFORCE_EQ(ctx.Input("X")->dtype(), + ctx.Input("Y")->dtype(), platform::errors::InvalidArgument( "The element's type of input should be the same.")); return framework::OpKernelType( diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h index 5942404a6beb1..0d6a5e3b40da9 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h @@ -49,13 +49,12 @@ template -static void RunBinaryCompoundFunctor( - const framework::ExecutionContext &ctx, - const BinaryFunctor &binary_functor, - const UnaryFunctor &unary_functor, - const framework::Tensor &in_x, - const framework::Tensor &in_y, - std::vector *outputs) { +static void RunBinaryCompoundFunctor(const framework::ExecutionContext &ctx, + const BinaryFunctor &binary_functor, + const UnaryFunctor &unary_functor, + const phi::DenseTensor &in_x, + const phi::DenseTensor &in_y, + std::vector *outputs) { // Z = Binary(X, Unary(Y)) // intermediate_out = Unary(Y) // out = Binary(X, Unary(Y)) @@ -86,13 +85,12 @@ template -static void RunUnaryCompoundFunctors( - const framework::ExecutionContext &ctx, - const UnaryFunctor &unary_functor, - const BinaryFunctor &binary_functor, - const framework::Tensor &in_x, - const framework::Tensor &in_y, - std::vector *outputs) { +static void RunUnaryCompoundFunctors(const framework::ExecutionContext &ctx, + const UnaryFunctor &unary_functor, + const BinaryFunctor &binary_functor, + const phi::DenseTensor &in_x, + const phi::DenseTensor &in_y, + std::vector *outputs) { // Z = Unary(Binary(X, Y)) // intermediate_out = Binary(X, Y) // out = Unary(Binary(X, Y)) @@ -132,14 +130,14 @@ static void RunBinaryCompoundGradFunctors( const BinaryGradFunctor &binary_grad_functor, const UnaryFunctor &unary_functor, const UnaryGradFunctor &unary_grad_functor, - const framework::Tensor *in_x, - const framework::Tensor *in_y, - const framework::Tensor *in_out, - const framework::Tensor *in_intermediate_out, - const framework::Tensor *in_out_grad, - framework::Tensor *x_grad, - framework::Tensor *y_grad, - framework::Tensor *d_intermediate_out) { + const phi::DenseTensor *in_x, + const phi::DenseTensor *in_y, + const phi::DenseTensor *in_out, + const phi::DenseTensor *in_intermediate_out, + const phi::DenseTensor *in_out_grad, + phi::DenseTensor *x_grad, + phi::DenseTensor *y_grad, + phi::DenseTensor *d_intermediate_out) { // Z = Binary(X, Unary(Y)) int axis = ctx.Attr("axis"); @@ -218,14 +216,14 @@ static void RunUnaryCompoundGradFunctors( const UnaryGradFunctor &unary_grad_functor, const BinaryFunctor &binary_functor, const BinaryGradFunctor &binary_grad_functor, - const framework::Tensor *in_x, - const framework::Tensor *in_y, - const framework::Tensor *in_out, - const framework::Tensor *in_intermediate_out, - const framework::Tensor *in_out_grad, - framework::Tensor *x_grad, - framework::Tensor *y_grad, - framework::Tensor *d_intermediate_out) { + const phi::DenseTensor *in_x, + const phi::DenseTensor *in_y, + const phi::DenseTensor *in_out, + const phi::DenseTensor *in_intermediate_out, + const phi::DenseTensor *in_out_grad, + phi::DenseTensor *x_grad, + phi::DenseTensor *y_grad, + phi::DenseTensor *d_intermediate_out) { // Z = Unary(Binary(X, Y)) int axis = ctx.Attr("axis"); @@ -298,9 +296,9 @@ static void RunUnaryCompoundGradFunctors( template static void RunFunctors(const framework::ExecutionContext &ctx, - const framework::Tensor &in_x, - const framework::Tensor &in_y, - std::vector *outputs) { + const phi::DenseTensor &in_x, + const phi::DenseTensor &in_y, + std::vector *outputs) { auto &functors = ctx.Attr>("functor_list"); // TODO(zcd): The following code can be refined. @@ -424,14 +422,14 @@ static void RunFunctors(const framework::ExecutionContext &ctx, template static void RunGradFunctors(const framework::ExecutionContext &ctx, - const framework::Tensor *in_x, - const framework::Tensor *in_y, - const framework::Tensor *in_out, - const framework::Tensor *in_intermediate_out, - const framework::Tensor *in_out_grad, - framework::Tensor *x_grad, - framework::Tensor *y_grad, - framework::Tensor *d_intermediate_out) { + const phi::DenseTensor *in_x, + const phi::DenseTensor *in_y, + const phi::DenseTensor *in_out, + const phi::DenseTensor *in_intermediate_out, + const phi::DenseTensor *in_out_grad, + phi::DenseTensor *x_grad, + phi::DenseTensor *y_grad, + phi::DenseTensor *d_intermediate_out) { auto &functors = ctx.Attr>("functor_list"); auto funcs_str = functors[0] + "," + functors[1]; @@ -622,11 +620,11 @@ template class FusedElemwiseActivationKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto &in_x = GET_DATA_SAFELY(ctx.Input("X"), + auto &in_x = GET_DATA_SAFELY(ctx.Input("X"), "Input", "X", "FusedElemwiseActivation"); - auto &in_y = GET_DATA_SAFELY(ctx.Input("Y"), + auto &in_y = GET_DATA_SAFELY(ctx.Input("Y"), "Input", "Y", "FusedElemwiseActivation"); @@ -635,9 +633,9 @@ class FusedElemwiseActivationKernel : public framework::OpKernel { true, platform::errors::InvalidArgument( "The output(Out) should not be empty")); - auto output = ctx.Output("Out"); + auto output = ctx.Output("Out"); - std::vector outputs; + std::vector outputs; outputs.emplace_back(output); if (ctx.Attr("save_intermediate_out")) { @@ -647,7 +645,7 @@ class FusedElemwiseActivationKernel : public framework::OpKernel { "The save_intermediate_out is enable, so the " "IntermediateOut should not be empty.")); - auto intermediate_out = ctx.Output("IntermediateOut"); + auto intermediate_out = ctx.Output("IntermediateOut"); outputs.emplace_back(intermediate_out); } else { outputs.emplace_back(nullptr); @@ -661,42 +659,42 @@ template class FusedElemwiseActivationGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto in_y = ctx.Input("Y"); + auto in_y = ctx.Input("Y"); PADDLE_ENFORCE_NE( in_y, nullptr, platform::errors::InvalidArgument("Input(Y) should not be nullptr.")); - auto in_out = ctx.Input("Out"); + auto in_out = ctx.Input("Out"); PADDLE_ENFORCE_NE( in_out, nullptr, platform::errors::InvalidArgument("Input(Out) should not be nullptr.")); auto in_out_grad = - ctx.Input(framework::GradVarName("Out")); + ctx.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_NE(in_out_grad, nullptr, platform::errors::InvalidArgument( "Input(Out@Grad) should not be nullptr.")); - framework::Tensor *in_x = - const_cast(ctx.Input("X")); - framework::Tensor *x_grad = - ctx.Output(framework::GradVarName("X")); - framework::Tensor *y_grad = - ctx.Output(framework::GradVarName("Y")); - framework::Tensor *d_intermediate_out = ctx.Output( - framework::GradVarName("IntermediateOut")); + phi::DenseTensor *in_x = + const_cast(ctx.Input("X")); + phi::DenseTensor *x_grad = + ctx.Output(framework::GradVarName("X")); + phi::DenseTensor *y_grad = + ctx.Output(framework::GradVarName("Y")); + phi::DenseTensor *d_intermediate_out = + ctx.Output(framework::GradVarName("IntermediateOut")); auto functor_list = ctx.Attr>("functor_list"); // Get intermediate_out - framework::Tensor *in_intermediate_out = nullptr; + phi::DenseTensor *in_intermediate_out = nullptr; if (ctx.Attr("save_intermediate_out")) { // if save_intermediate_out is true, for Unary(Binary(x, y)) and // Binary(x, Unary(y)), the Binary(x, y) and Unary(y) not need to // recompute. - in_intermediate_out = const_cast( - ctx.Input("IntermediateOut")); + in_intermediate_out = const_cast( + ctx.Input("IntermediateOut")); PADDLE_ENFORCE_NE(in_intermediate_out, nullptr, platform::errors::InvalidArgument( @@ -725,7 +723,7 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { platform::errors::InvalidArgument( "Only when the compoundfunctor contains " "elementwise_add_grad, the 'X' could be absent.")); - in_x = const_cast(in_out_grad); + in_x = const_cast(in_out_grad); } bool has_in_place = HasInPlaceUnary(functor_list); diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc index 1b291cfa018ad..4f8c4d12d6b58 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc @@ -105,7 +105,7 @@ class EmbeddingEltWiseLayerNormOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto inputs = ctx.MultiInput("Embs"); + auto inputs = ctx.MultiInput("Embs"); auto input_data_type = framework::proto::VarType::Type(0); bool flag = 0; for (auto* input : inputs) { diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu index 75e131b2deb34..8360f07a5f3e7 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu @@ -33,13 +33,13 @@ template class EmbeddingEltWiseLayerNormKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; auto &device_ctx = context.template device_context(); - auto ids = context.MultiInput("Ids"); - auto embs = context.MultiInput("Embs"); + auto ids = context.MultiInput("Ids"); + auto embs = context.MultiInput("Embs"); int input_num = static_cast(ids.size()); - framework::Tensor in_ids_( + phi::DenseTensor in_ids_( framework::TransToPhiDataType(framework::proto::VarType::INT64)), in_embs_( framework::TransToPhiDataType(framework::proto::VarType::INT64)); @@ -90,9 +90,9 @@ class EmbeddingEltWiseLayerNormKernel : public framework::OpKernel { device_ctx.stream()); #endif - auto *bias = context.Input("Bias"); - auto *scale = context.Input("Scale"); - auto *out = context.Output("Out"); + auto *bias = context.Input("Bias"); + auto *scale = context.Input("Scale"); + auto *out = context.Output("Out"); // should be (B * S * hidden) auto id0_dims = ids[0]->dims(); diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc index 8f413f34242a8..af75fa6112e3a 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc @@ -285,17 +285,17 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { act_cand = act_functor(act_cand_str); \ } -#define INIT_BASE_INPUT_OUTPUT \ - auto* ids = ctx.Input("Ids"); \ - auto* h0 = ctx.Input("H0"); \ - auto* c0 = ctx.Input("C0"); \ - auto* embeddings = ctx.Input("Embeddings"); \ - auto* wh = ctx.Input("WeightH"); \ - auto* bias = ctx.Input("Bias"); \ - auto* xx = ctx.Output("XX"); \ - auto* hidden_out = ctx.Output("Hidden"); \ - auto* cell_out = ctx.Output("Cell"); \ - bool is_reverse = ctx.Attr("is_reverse"); \ +#define INIT_BASE_INPUT_OUTPUT \ + auto* ids = ctx.Input("Ids"); \ + auto* h0 = ctx.Input("H0"); \ + auto* c0 = ctx.Input("C0"); \ + auto* embeddings = ctx.Input("Embeddings"); \ + auto* wh = ctx.Input("WeightH"); \ + auto* bias = ctx.Input("Bias"); \ + auto* xx = ctx.Output("XX"); \ + auto* hidden_out = ctx.Output("Hidden"); \ + auto* cell_out = ctx.Output("Cell"); \ + bool is_reverse = ctx.Attr("is_reverse"); \ bool use_peepholes = ctx.Attr("use_peepholes"); #define INIT_BASE_SIZES \ @@ -506,8 +506,8 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { INIT_VEC_FUNC INIT_BASE_INPUT_DATAS - auto* reordered_h0 = ctx.Output("ReorderedH0"); - auto* reordered_c0 = ctx.Output("ReorderedC0"); + auto* reordered_h0 = ctx.Output("ReorderedH0"); + auto* reordered_c0 = ctx.Output("ReorderedC0"); auto* batched_input = ctx.Output("BatchedInput"); auto* batched_c_out = ctx.Output("BatchedCell"); auto* batched_h_out = ctx.Output("BatchedHidden"); diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h index 2775b2ac04d28..129123fc52cd7 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedEmbeddingFCLSTMOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h index c593c65618d78..ced30ccc50ec5 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; diff --git a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu index 758fb8a23f8f9..74ba0b54afd45 100644 --- a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu @@ -384,9 +384,9 @@ template class FusedFCElementwiseLayerNormOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* w = ctx.Input("W"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* w = ctx.Input("W"); + auto* out = ctx.Output("Out"); auto w_dims = w->dims(); int N = w_dims[1]; @@ -413,18 +413,18 @@ class FusedFCElementwiseLayerNormOpKernel : public framework::OpKernel { static_cast(0.0), out_data, N); - auto* y = ctx.Input("Y"); - auto* bias_0 = ctx.Input("Bias0"); - auto* bias_1 = ctx.Input("Bias1"); - auto* scale = ctx.Input("Scale"); + auto* y = ctx.Input("Y"); + auto* bias_0 = ctx.Input("Bias0"); + auto* bias_1 = ctx.Input("Bias1"); + auto* scale = ctx.Input("Scale"); const T* y_data = y->data(); const T* bias_0_data = bias_0 ? bias_0->data() : nullptr; const T* bias_1_data = bias_1 ? bias_1->data() : nullptr; const T* scale_data = scale ? scale->data() : nullptr; - auto* mean = ctx.Output("Mean"); - auto* variance = ctx.Output("Variance"); + auto* mean = ctx.Output("Mean"); + auto* variance = ctx.Output("Variance"); T* mean_data = mean ? dev_ctx.template Alloc(mean, mean->numel() * sizeof(T)) diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index 9b8b256a9ee54..71fe468f780b2 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedFeedForwardOp : public framework::OperatorWithKernel { public: @@ -345,7 +345,7 @@ class FusedFeedForwardOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto input = ctx.Input("X"); + auto input = ctx.Input("X"); auto input_data_type = framework::TransToProtoVarType(input->dtype()); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index 33d1e89bf28fe..6084b1f61f80c 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -31,10 +31,10 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template -static void AllReduce(framework::Tensor& tensor, // NOLINT +static void AllReduce(phi::DenseTensor& tensor, // NOLINT const int ring_id, const phi::GPUContext& ctx) { if (ring_id == -1) return; @@ -74,9 +74,9 @@ template class FusedFeedForwardKernel : public framework::OpKernel { public: void MatMul(const phi::GPUContext& ctx, - const framework::Tensor& a, - const framework::Tensor& b, - framework::Tensor* c) const { + const phi::DenseTensor& a, + const phi::DenseTensor& b, + phi::DenseTensor* c) const { auto blas = phi::funcs::GetBlas(ctx); auto a_2d = FoldInitDims(a); auto b_2d = FoldInitDims(b); @@ -87,26 +87,26 @@ class FusedFeedForwardKernel : public framework::OpKernel { } void FFN(const phi::GPUContext& ctx, - const framework::Tensor& x, - const framework::Tensor& linear1_weight, - const framework::Tensor* linear1_bias, - const framework::Tensor& linear2_weight, - const framework::Tensor* linear2_bias, - const framework::Tensor* ln1_scale, - const framework::Tensor* ln1_bias, - const framework::Tensor* ln2_scale, - const framework::Tensor* ln2_bias, - framework::Tensor* out, - framework::Tensor* dropout1_mask, - framework::Tensor* dropout2_mask, - framework::Tensor* ln1_mean, - framework::Tensor* ln1_variance, - framework::Tensor* ln2_mean, - framework::Tensor* ln2_variance, - framework::Tensor* linear1_out, - framework::Tensor* ln1_out, - framework::Tensor* dropout1_out, - framework::Tensor* dropout2_out, + const phi::DenseTensor& x, + const phi::DenseTensor& linear1_weight, + const phi::DenseTensor* linear1_bias, + const phi::DenseTensor& linear2_weight, + const phi::DenseTensor* linear2_bias, + const phi::DenseTensor* ln1_scale, + const phi::DenseTensor* ln1_bias, + const phi::DenseTensor* ln2_scale, + const phi::DenseTensor* ln2_bias, + phi::DenseTensor* out, + phi::DenseTensor* dropout1_mask, + phi::DenseTensor* dropout2_mask, + phi::DenseTensor* ln1_mean, + phi::DenseTensor* ln1_variance, + phi::DenseTensor* ln2_mean, + phi::DenseTensor* ln2_variance, + phi::DenseTensor* linear1_out, + phi::DenseTensor* ln1_out, + phi::DenseTensor* dropout1_out, + phi::DenseTensor* dropout2_out, const int bsz_seq, const int d_model, const int dim_feedforward, @@ -126,7 +126,7 @@ class FusedFeedForwardKernel : public framework::OpKernel { ctx, bsz_seq, d_model, dropout_param2, epsilon2); using U = LayerNormParamType; - const framework::Tensor* in = &x; + const phi::DenseTensor* in = &x; const U* ln1_scale_ptr = ln1_scale == nullptr ? nullptr : ln1_scale->data(); @@ -156,7 +156,7 @@ class FusedFeedForwardKernel : public framework::OpKernel { act_method, dropout1_out->data(), dropout1_mask->data()); - framework::Tensor linear2_out; + phi::DenseTensor linear2_out; linear2_out.Resize({bsz_seq, d_model}); ctx.Alloc(&linear2_out, linear2_out.numel() * sizeof(T)); MatMul(ctx, *dropout1_out, linear2_weight, &linear2_out); @@ -197,43 +197,41 @@ class FusedFeedForwardKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* linear1_weight = context.Input("Linear1Weight"); - auto* linear1_bias = context.Input("Linear1Bias"); - auto* linear2_weight = context.Input("Linear2Weight"); - auto* linear2_bias = context.Input("Linear2Bias"); + auto* x = context.Input("X"); + auto* linear1_weight = context.Input("Linear1Weight"); + auto* linear1_bias = context.Input("Linear1Bias"); + auto* linear2_weight = context.Input("Linear2Weight"); + auto* linear2_bias = context.Input("Linear2Bias"); const bool pre_layer_norm = context.Attr("pre_layer_norm"); auto& dev_ctx = context.template device_context(); auto* ln1_scale = - pre_layer_norm ? context.Input("Ln1Scale") : nullptr; + pre_layer_norm ? context.Input("Ln1Scale") : nullptr; auto* ln1_bias = - pre_layer_norm ? context.Input("Ln1Bias") : nullptr; - auto* ln2_scale = !pre_layer_norm - ? context.Input("Ln2Scale") - : nullptr; + pre_layer_norm ? context.Input("Ln1Bias") : nullptr; + auto* ln2_scale = + !pre_layer_norm ? context.Input("Ln2Scale") : nullptr; auto* ln2_bias = - !pre_layer_norm ? context.Input("Ln2Bias") : nullptr; + !pre_layer_norm ? context.Input("Ln2Bias") : nullptr; auto* ln1_mean = - pre_layer_norm ? context.Output("Ln1Mean") : nullptr; + pre_layer_norm ? context.Output("Ln1Mean") : nullptr; auto* ln1_variance = pre_layer_norm - ? context.Output("Ln1Variance") + ? context.Output("Ln1Variance") : nullptr; - auto* ln2_mean = !pre_layer_norm - ? context.Output("Ln2Mean") - : nullptr; + auto* ln2_mean = + !pre_layer_norm ? context.Output("Ln2Mean") : nullptr; auto* ln2_variance = !pre_layer_norm - ? context.Output("Ln2Variance") + ? context.Output("Ln2Variance") : nullptr; - auto* out = context.Output("Out"); - auto* dropout1_mask = context.Output("Dropout1Mask"); - auto* dropout2_mask = context.Output("Dropout2Mask"); - auto* linear1_out = context.Output("Linear1Out"); + auto* out = context.Output("Out"); + auto* dropout1_mask = context.Output("Dropout1Mask"); + auto* dropout2_mask = context.Output("Dropout2Mask"); + auto* linear1_out = context.Output("Linear1Out"); auto* ln1_out = - pre_layer_norm ? context.Output("Ln1Out") : nullptr; - auto* dropout1_out = context.Output("Dropout1Out"); - auto* dropout2_out = context.Output("Dropout2Out"); + pre_layer_norm ? context.Output("Ln1Out") : nullptr; + auto* dropout1_out = context.Output("Dropout1Out"); + auto* dropout2_out = context.Output("Dropout2Out"); const std::string act_method = context.Attr("act_method"); @@ -312,11 +310,11 @@ template class FusedFeedForwardGradKernel : public framework::OpKernel { public: void MatMulGrad(const phi::GPUContext& ctx, - const framework::Tensor& d_out, - const framework::Tensor& a, - const framework::Tensor& b, - framework::Tensor* d_a, - framework::Tensor* d_b) const { + const phi::DenseTensor& d_out, + const phi::DenseTensor& a, + const phi::DenseTensor& b, + phi::DenseTensor* d_a, + phi::DenseTensor* d_b) const { auto blas = phi::funcs::GetBlas(ctx); auto a_2d = FoldInitDims(a); auto b_2d = FoldInitDims(b); @@ -330,34 +328,34 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { } void FFNGrad(const phi::GPUContext& ctx, - const framework::Tensor& d_out, - const framework::Tensor& x, - const framework::Tensor& dropout1_mask, - const framework::Tensor& dropout2_mask, - const framework::Tensor& linear1_out, - const framework::Tensor* ln1_out, - const framework::Tensor& dropout1_out, - const framework::Tensor& dropout2_out, - const framework::Tensor& linear1_weight, - const framework::Tensor* linear1_bias, - const framework::Tensor& linear2_weight, - const framework::Tensor* ln1_gamma, - const framework::Tensor* ln1_beta, - const framework::Tensor* ln1_mean, - const framework::Tensor* ln1_variance, - const framework::Tensor* ln2_gamma, - const framework::Tensor* ln2_beta, - const framework::Tensor* ln2_mean, - const framework::Tensor* ln2_variance, - framework::Tensor* d_x, - framework::Tensor* d_linear1_weight, - framework::Tensor* d_linear1_bias, - framework::Tensor* d_linear2_weight, - framework::Tensor* d_linear2_bias, - framework::Tensor* d_ln1_gamma, - framework::Tensor* d_ln1_beta, - framework::Tensor* d_ln2_gamma, - framework::Tensor* d_ln2_beta, + const phi::DenseTensor& d_out, + const phi::DenseTensor& x, + const phi::DenseTensor& dropout1_mask, + const phi::DenseTensor& dropout2_mask, + const phi::DenseTensor& linear1_out, + const phi::DenseTensor* ln1_out, + const phi::DenseTensor& dropout1_out, + const phi::DenseTensor& dropout2_out, + const phi::DenseTensor& linear1_weight, + const phi::DenseTensor* linear1_bias, + const phi::DenseTensor& linear2_weight, + const phi::DenseTensor* ln1_gamma, + const phi::DenseTensor* ln1_beta, + const phi::DenseTensor* ln1_mean, + const phi::DenseTensor* ln1_variance, + const phi::DenseTensor* ln2_gamma, + const phi::DenseTensor* ln2_beta, + const phi::DenseTensor* ln2_mean, + const phi::DenseTensor* ln2_variance, + phi::DenseTensor* d_x, + phi::DenseTensor* d_linear1_weight, + phi::DenseTensor* d_linear1_bias, + phi::DenseTensor* d_linear2_weight, + phi::DenseTensor* d_linear2_bias, + phi::DenseTensor* d_ln1_gamma, + phi::DenseTensor* d_ln1_beta, + phi::DenseTensor* d_ln2_gamma, + phi::DenseTensor* d_ln2_beta, const int bsz_seq, const int d_model, const int dim_feedforward, @@ -396,7 +394,7 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { d_ln2_gamma == nullptr ? nullptr : d_ln2_gamma->data(); U* d_ln2_beta_ptr = d_ln2_beta == nullptr ? nullptr : d_ln2_beta->data(); - framework::Tensor d_linear2_out, d_dropout2_out, d_residual; + phi::DenseTensor d_linear2_out, d_dropout2_out, d_residual; d_linear2_out.Resize({bsz_seq, d_model}); ctx.Alloc(&d_linear2_out, d_linear2_out.numel() * sizeof(T)); d_dropout2_out.Resize({bsz_seq, d_model}); @@ -433,7 +431,7 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { d_residual_ptr); } - framework::Tensor d_dropout1_out; + phi::DenseTensor d_dropout1_out; d_dropout1_out.Resize({bsz_seq, dim_feedforward}); ctx.Alloc(&d_dropout1_out, d_dropout1_out.numel() * sizeof(T)); MatMulGrad(ctx, @@ -443,7 +441,7 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { &d_dropout1_out, d_linear2_weight); - framework::Tensor d_linear1_out; + phi::DenseTensor d_linear1_out; d_linear1_out.Resize({bsz_seq, dim_feedforward}); ctx.Alloc(&d_linear1_out, d_linear1_out.numel() * sizeof(T)); fused_act_dropout_helper.DropoutActBiasGrad(ctx, @@ -456,7 +454,7 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { act_method); if (pre_layer_norm) { - framework::Tensor d_ln1_out; + phi::DenseTensor d_ln1_out; d_ln1_out.Resize({bsz_seq, d_model}); ctx.Alloc(&d_ln1_out, d_ln1_out.numel() * sizeof(T)); MatMulGrad(ctx, @@ -484,8 +482,8 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { if (add_residual) { // gradient accumulation - std::vector ins = {&d_residual, d_x}; - std::vector outs = {d_x}; + std::vector ins = {&d_residual, d_x}; + std::vector outs = {d_x}; phi::funcs::ElementwiseKernel( ctx, ins, &outs, phi::funcs::AddFunctor()); } @@ -495,61 +493,60 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { using U = LayerNormParamType; auto& dev_ctx = context.template device_context(); auto d_out = - *context.Input(framework::GradVarName("Out")); - auto x = *context.Input("X"); + *context.Input(framework::GradVarName("Out")); + auto x = *context.Input("X"); const bool pre_layer_norm = context.Attr("pre_layer_norm"); - auto dropout1_mask = *context.Input("Dropout1Mask"); - auto dropout2_mask = *context.Input("Dropout2Mask"); - auto linear1_out = *context.Input("Linear1Out"); + auto dropout1_mask = *context.Input("Dropout1Mask"); + auto dropout2_mask = *context.Input("Dropout2Mask"); + auto linear1_out = *context.Input("Linear1Out"); auto* ln1_out = - pre_layer_norm ? context.Input("Ln1Out") : nullptr; - auto dropout1_out = *context.Input("Dropout1Out"); - auto dropout2_out = *context.Input("Dropout2Out"); - auto linear1_weight = *context.Input("Linear1Weight"); - auto* linear1_bias = context.Input("Linear1Bias"); - auto linear2_weight = *context.Input("Linear2Weight"); + pre_layer_norm ? context.Input("Ln1Out") : nullptr; + auto dropout1_out = *context.Input("Dropout1Out"); + auto dropout2_out = *context.Input("Dropout2Out"); + auto linear1_weight = *context.Input("Linear1Weight"); + auto* linear1_bias = context.Input("Linear1Bias"); + auto linear2_weight = *context.Input("Linear2Weight"); auto* ln1_mean = - pre_layer_norm ? context.Input("Ln1Mean") : nullptr; + pre_layer_norm ? context.Input("Ln1Mean") : nullptr; auto* ln1_variance = pre_layer_norm - ? context.Input("Ln1Variance") + ? context.Input("Ln1Variance") : nullptr; auto* ln1_scale = - pre_layer_norm ? context.Input("Ln1Scale") : nullptr; + pre_layer_norm ? context.Input("Ln1Scale") : nullptr; auto* ln1_bias = - pre_layer_norm ? context.Input("Ln1Bias") : nullptr; + pre_layer_norm ? context.Input("Ln1Bias") : nullptr; auto* ln2_mean = - !pre_layer_norm ? context.Input("Ln2Mean") : nullptr; + !pre_layer_norm ? context.Input("Ln2Mean") : nullptr; auto* ln2_variance = !pre_layer_norm - ? context.Input("Ln2Variance") + ? context.Input("Ln2Variance") : nullptr; - auto* ln2_scale = !pre_layer_norm - ? context.Input("Ln2Scale") - : nullptr; + auto* ln2_scale = + !pre_layer_norm ? context.Input("Ln2Scale") : nullptr; auto* ln2_bias = - !pre_layer_norm ? context.Input("Ln2Bias") : nullptr; + !pre_layer_norm ? context.Input("Ln2Bias") : nullptr; - auto* d_x = context.Output(framework::GradVarName("X")); - auto* d_ln1_scale = pre_layer_norm ? context.Output( + auto* d_x = context.Output(framework::GradVarName("X")); + auto* d_ln1_scale = pre_layer_norm ? context.Output( framework::GradVarName("Ln1Scale")) : nullptr; - auto* d_ln1_bias = pre_layer_norm ? context.Output( + auto* d_ln1_bias = pre_layer_norm ? context.Output( framework::GradVarName("Ln1Bias")) : nullptr; auto* d_ln2_scale = pre_layer_norm ? nullptr - : context.Output( + : context.Output( framework::GradVarName("Ln2Scale")); auto* d_ln2_bias = pre_layer_norm ? nullptr - : context.Output( + : context.Output( framework::GradVarName("Ln2Bias")); - auto* d_linear1_weight = context.Output( + auto* d_linear1_weight = context.Output( framework::GradVarName("Linear1Weight")); - auto* d_linear1_bias = context.Output( - framework::GradVarName("Linear1Bias")); - auto* d_linear2_weight = context.Output( + auto* d_linear1_bias = + context.Output(framework::GradVarName("Linear1Bias")); + auto* d_linear2_weight = context.Output( framework::GradVarName("Linear2Weight")); - auto* d_linear2_bias = context.Output( - framework::GradVarName("Linear2Bias")); + auto* d_linear2_bias = + context.Output(framework::GradVarName("Linear2Bias")); const float epsilon1 = context.Attr("ln1_epsilon"); const float epsilon2 = context.Attr("ln2_epsilon"); diff --git a/paddle/fluid/operators/fused/fused_gate_attention.h b/paddle/fluid/operators/fused/fused_gate_attention.h index 12db3e6e0d63d..e50cc24d88adf 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention.h +++ b/paddle/fluid/operators/fused/fused_gate_attention.h @@ -24,9 +24,9 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; -inline std::string MemoryDebugString(const Tensor& t) { +inline std::string MemoryDebugString(const phi::DenseTensor& t) { int device_id = platform::GetCurrentDeviceId(); int64_t allocated = memory::DeviceMemoryStatCurrentValue("Allocated", device_id); @@ -46,7 +46,7 @@ inline std::string MemoryDebugString(const Tensor& t) { template void AllocWithDebugInfo(const phi::GPUContext& dev_ctx, const std::string& info, - Tensor* t) { + phi::DenseTensor* t) { dev_ctx.Alloc(t, t->numel() * sizeof(T)); VLOG(4) << info << ": " << MemoryDebugString(*t); } @@ -87,10 +87,10 @@ struct GateAttentionConfig { phi::DDim gate_out_dims; GateAttentionConfig(const phi::GPUContext& dev_ctx, - const Tensor* query, - const Tensor* key, - const Tensor* query_weight, - const Tensor* qkv_weight, + const phi::DenseTensor* query, + const phi::DenseTensor* key, + const phi::DenseTensor* query_weight, + const phi::DenseTensor* qkv_weight, bool merge_qkv, bool has_gating) : dev_ctx(dev_ctx), merge_qkv(merge_qkv), has_gating(has_gating) { @@ -152,7 +152,7 @@ struct GateAttentionConfig { return batch_size * seq_len_m * seq_len_r * num_heads * head_dim; } - Tensor* GetQKVOut() { + phi::DenseTensor* GetQKVOut() { if (!qkv_out.IsInitialized()) { qkv_out.Resize(qkv_out_dims); AllocWithDebugInfo(dev_ctx, "qkv_out", &qkv_out); @@ -160,7 +160,7 @@ struct GateAttentionConfig { return &qkv_out; } - Tensor* GetQueryOut() { + phi::DenseTensor* GetQueryOut() { if (!query_out.IsInitialized()) { query_out.Resize(q_out_dims); AllocWithDebugInfo(dev_ctx, "query_out", &query_out); @@ -168,7 +168,7 @@ struct GateAttentionConfig { return &query_out; } - Tensor* GetKeyOut() { + phi::DenseTensor* GetKeyOut() { if (!key_out.IsInitialized()) { key_out.Resize(kv_out_dims); AllocWithDebugInfo(dev_ctx, "key_out", &key_out); @@ -176,7 +176,7 @@ struct GateAttentionConfig { return &key_out; } - Tensor* GetValueOut() { + phi::DenseTensor* GetValueOut() { if (!value_out.IsInitialized()) { value_out.Resize(kv_out_dims); AllocWithDebugInfo(dev_ctx, "value_out", &value_out); @@ -184,7 +184,7 @@ struct GateAttentionConfig { return &value_out; } - Tensor* GetQKOut(Tensor* softmax_out) { + phi::DenseTensor* GetQKOut(phi::DenseTensor* softmax_out) { // softmax_dim = qk_out_dim[-1] = qk_out_dim[rank - 1] int softmax_dim = m_size; if (!softmax_out || phi::UseCudnnSoftmax(dev_ctx, softmax_dim, true)) { @@ -200,7 +200,7 @@ struct GateAttentionConfig { } } - Tensor* GetQKTVOut(Tensor* gate_out) { + phi::DenseTensor* GetQKTVOut(phi::DenseTensor* gate_out) { if (has_gating && gate_out) { // Reuse gate_out. gate_out->Resize(qktv_out_dims); @@ -250,10 +250,10 @@ template struct GateAttentionGradConfig : public GateAttentionConfig { public: GateAttentionGradConfig(const phi::GPUContext& dev_ctx, - const Tensor* query, - const Tensor* key, - const Tensor* query_weight, - const Tensor* qkv_weight, + const phi::DenseTensor* query, + const phi::DenseTensor* key, + const phi::DenseTensor* query_weight, + const phi::DenseTensor* qkv_weight, bool merge_qkv, bool has_gating) : GateAttentionConfig(dev_ctx, @@ -264,7 +264,7 @@ struct GateAttentionGradConfig : public GateAttentionConfig { merge_qkv, has_gating) {} - Tensor* GetQKVOutGrad() { + phi::DenseTensor* GetQKVOutGrad() { if (!qkv_out_grad.IsInitialized()) { qkv_out_grad.Resize(this->qkv_out_dims); AllocWithDebugInfo(this->dev_ctx, "qkv_out_grad", &qkv_out_grad); @@ -272,7 +272,7 @@ struct GateAttentionGradConfig : public GateAttentionConfig { return &qkv_out_grad; } - Tensor* GetQueryOutGrad() { + phi::DenseTensor* GetQueryOutGrad() { if (!query_out_grad.IsInitialized()) { query_out_grad.Resize(this->q_out_dims); AllocWithDebugInfo(this->dev_ctx, "query_out_grad", &query_out_grad); @@ -280,7 +280,7 @@ struct GateAttentionGradConfig : public GateAttentionConfig { return &query_out_grad; } - Tensor* GetKeyOutGrad() { + phi::DenseTensor* GetKeyOutGrad() { if (!key_out_grad.IsInitialized()) { key_out_grad.Resize(this->kv_out_dims); AllocWithDebugInfo(this->dev_ctx, "key_out_grad", &key_out_grad); @@ -288,7 +288,7 @@ struct GateAttentionGradConfig : public GateAttentionConfig { return &key_out_grad; } - Tensor* GetValueOutGrad() { + phi::DenseTensor* GetValueOutGrad() { if (!value_out_grad.IsInitialized()) { value_out_grad.Resize(this->kv_out_dims); AllocWithDebugInfo(this->dev_ctx, "value_out_grad", &value_out_grad); @@ -296,7 +296,7 @@ struct GateAttentionGradConfig : public GateAttentionConfig { return &value_out_grad; } - Tensor* GetQKOutGrad(Tensor* softmax_out_grad) { + phi::DenseTensor* GetQKOutGrad(phi::DenseTensor* softmax_out_grad) { // softmax_dim = qk_out_dim[-1] = qk_out_dim[rank - 1] int softmax_dim = this->m_size; if (!softmax_out_grad || @@ -325,15 +325,15 @@ class FMHAGateRef { FMHAGateRef(const phi::GPUContext& dev_ctx, bool merge_qkv) : dev_ctx_(dev_ctx), merge_qkv_(merge_qkv) {} - void ComputeForward(const Tensor* nonbatched_bias, - const Tensor* src_mask, - Tensor* q_transpose_out, - Tensor* k_transpose_out, - Tensor* v_transpose_out, - Tensor* qkv_transpose_out, - Tensor* softmax_out, - Tensor* fmha_out, - Tensor* gate_out, + void ComputeForward(const phi::DenseTensor* nonbatched_bias, + const phi::DenseTensor* src_mask, + phi::DenseTensor* q_transpose_out, + phi::DenseTensor* k_transpose_out, + phi::DenseTensor* v_transpose_out, + phi::DenseTensor* qkv_transpose_out, + phi::DenseTensor* softmax_out, + phi::DenseTensor* fmha_out, + phi::DenseTensor* gate_out, GateAttentionConfig* config) { T* q_ptr = nullptr; T* k_ptr = nullptr; @@ -345,7 +345,7 @@ class FMHAGateRef { platform::errors::NotFound("The input qkv_transpose_out can not be " "nullptr when merge_qkv is true.")); - Tensor* qkv_out = config->GetQKVOut(); + phi::DenseTensor* qkv_out = config->GetQKVOut(); ComputeQKVTransposeForward(*qkv_out, qkv_transpose_out); config->ClearQKVOut(); @@ -368,9 +368,9 @@ class FMHAGateRef { platform::errors::NotFound("The input v_transpose_out can not be " "nullptr when merge_qkv is false.")); - Tensor* query_out = config->GetQueryOut(); - Tensor* key_out = config->GetKeyOut(); - Tensor* value_out = config->GetValueOut(); + phi::DenseTensor* query_out = config->GetQueryOut(); + phi::DenseTensor* key_out = config->GetKeyOut(); + phi::DenseTensor* value_out = config->GetValueOut(); ComputeQKVTransposeForward(*query_out, *key_out, *value_out, @@ -388,7 +388,7 @@ class FMHAGateRef { // [batch_size, seq_len_m, num_heads, seq_len_r, head_dim] * // [batch_size, seq_len_m, num_heads, m_size, head_dim] // -> [batch_size, seq_len_m, num_heads, seq_len_r, m_size] - Tensor* qk_out = config->GetQKOut(softmax_out); + phi::DenseTensor* qk_out = config->GetQKOut(softmax_out); T* qk_out_ptr = qk_out->data(); int64_t gemm_batch_size = @@ -418,7 +418,7 @@ class FMHAGateRef { // [batch_size, seq_len_m, num_heads, seq_len_r, m_size] * // [batch_size, seq_len_m, num_heads, m_size, head_dim] // -> [batch_size, seq_len_m, num_heads, seq_len_r, head_dim] - Tensor* qktv_out = config->GetQKTVOut(gate_out); + phi::DenseTensor* qktv_out = config->GetQKTVOut(gate_out); T* qktv_out_ptr = qktv_out->data(); gemm_m = config->seq_len_r; @@ -444,14 +444,14 @@ class FMHAGateRef { } } - void ComputeBackward(const Tensor* q_transpose_out, - const Tensor* k_transpose_out, - const Tensor* v_transpose_out, - const Tensor* qkv_transpose_out, - const Tensor* softmax_out, - const Tensor* fmha_out_grad, - Tensor* src_mask_grad, - Tensor* nonbatched_bias_grad, + void ComputeBackward(const phi::DenseTensor* q_transpose_out, + const phi::DenseTensor* k_transpose_out, + const phi::DenseTensor* v_transpose_out, + const phi::DenseTensor* qkv_transpose_out, + const phi::DenseTensor* softmax_out, + const phi::DenseTensor* fmha_out_grad, + phi::DenseTensor* src_mask_grad, + phi::DenseTensor* nonbatched_bias_grad, GateAttentionGradConfig* config) { const T* q_ptr = nullptr; const T* k_ptr = nullptr; @@ -562,7 +562,7 @@ class FMHAGateRef { gemm_batch_size); } - Tensor* qk_out_grad = config->GetQKOutGrad(&softmax_out_grad); + phi::DenseTensor* qk_out_grad = config->GetQKOutGrad(&softmax_out_grad); ComputeBiasMaskSoftmaxBackward(&softmax_out_grad, softmax_out, src_mask_grad, @@ -604,12 +604,12 @@ class FMHAGateRef { alpha); if (merge_qkv_) { - Tensor* qkv_out_grad = config->GetQKVOutGrad(); + phi::DenseTensor* qkv_out_grad = config->GetQKVOutGrad(); ComputeQKVTransposeBackward(qkv_transpose_out_grad, qkv_out_grad); } else { - Tensor* q_out_grad = config->GetQueryOutGrad(); - Tensor* k_out_grad = config->GetKeyOutGrad(); - Tensor* v_out_grad = config->GetValueOutGrad(); + phi::DenseTensor* q_out_grad = config->GetQueryOutGrad(); + phi::DenseTensor* k_out_grad = config->GetKeyOutGrad(); + phi::DenseTensor* v_out_grad = config->GetValueOutGrad(); ComputeQKVTransposeBackward(q_transpose_out_grad, k_transpose_out_grad, v_transpose_out_grad, @@ -619,24 +619,24 @@ class FMHAGateRef { } } - void ComputeQKVTransposeForward(const Tensor& q_out, - const Tensor& k_out, - const Tensor& v_out, - Tensor* q_transpose_out, - Tensor* k_transpose_out, - Tensor* v_transpose_out) { + void ComputeQKVTransposeForward(const phi::DenseTensor& q_out, + const phi::DenseTensor& k_out, + const phi::DenseTensor& v_out, + phi::DenseTensor* q_transpose_out, + phi::DenseTensor* k_transpose_out, + phi::DenseTensor* v_transpose_out) { std::vector perm = {0, 1, 3, 2, 4}; TransposeGPUKernelDriver(dev_ctx_, q_out, perm, q_transpose_out); TransposeGPUKernelDriver(dev_ctx_, k_out, perm, k_transpose_out); TransposeGPUKernelDriver(dev_ctx_, v_out, perm, v_transpose_out); } - void ComputeQKVTransposeBackward(const Tensor& q_transpose_out_grad, - const Tensor& k_transpose_out_grad, - const Tensor& v_transpose_out_grad, - Tensor* q_out_grad, - Tensor* k_out_grad, - Tensor* v_out_grad) { + void ComputeQKVTransposeBackward(const phi::DenseTensor& q_transpose_out_grad, + const phi::DenseTensor& k_transpose_out_grad, + const phi::DenseTensor& v_transpose_out_grad, + phi::DenseTensor* q_out_grad, + phi::DenseTensor* k_out_grad, + phi::DenseTensor* v_out_grad) { std::vector perm = {0, 1, 3, 2, 4}; TransposeGPUKernelDriver( dev_ctx_, q_transpose_out_grad, perm, q_out_grad); @@ -648,14 +648,15 @@ class FMHAGateRef { // [batch_size, seq_len_m, seq_len_r, 3, num_heads, head_dim] -> // [3, batch_size, seq_len_m, num_heads, seq_len_r, head_dim] - void ComputeQKVTransposeForward(const Tensor& qkv_out, - Tensor* qkv_transpose_out) { + void ComputeQKVTransposeForward(const phi::DenseTensor& qkv_out, + phi::DenseTensor* qkv_transpose_out) { std::vector perm = {3, 0, 1, 4, 2, 5}; TransposeGPUKernelDriver(dev_ctx_, qkv_out, perm, qkv_transpose_out); } - void ComputeQKVTransposeBackward(const Tensor& qkv_transpose_out_grad, - Tensor* qkv_out_grad) { + void ComputeQKVTransposeBackward( + const phi::DenseTensor& qkv_transpose_out_grad, + phi::DenseTensor* qkv_out_grad) { std::vector perm = {1, 2, 4, 0, 3, 5}; TransposeGPUKernelDriver( dev_ctx_, qkv_transpose_out_grad, perm, qkv_out_grad); @@ -663,31 +664,33 @@ class FMHAGateRef { // [batch_size, seq_len_m, num_head, seq_len_r, c] -> // [batch_size, seq_len_m, seq_len_r, num_head, c] - void ComputeQKTVTransposeForward(const Tensor& qktv_out, Tensor* fmha_out) { + void ComputeQKTVTransposeForward(const phi::DenseTensor& qktv_out, + phi::DenseTensor* fmha_out) { std::vector perm = {0, 1, 3, 2, 4}; TransposeGPUKernelDriver(dev_ctx_, qktv_out, perm, fmha_out); } - void ComputeQKTVTransposeBackward(const Tensor& fmha_out_grad, - Tensor* qktv_out_grad) { + void ComputeQKTVTransposeBackward(const phi::DenseTensor& fmha_out_grad, + phi::DenseTensor* qktv_out_grad) { std::vector perm = {0, 1, 3, 2, 4}; TransposeGPUKernelDriver(dev_ctx_, fmha_out_grad, perm, qktv_out_grad); } // qk_out = qk_out + nonbatched_bias + src_mask // softmax_out = softmax(src_mask_out) - void ComputeBiasMaskSoftmaxForward(const Tensor* nonbatched_bias, - const Tensor* src_mask, - Tensor* qk_out, - Tensor* softmax_out) { + void ComputeBiasMaskSoftmaxForward(const phi::DenseTensor* nonbatched_bias, + const phi::DenseTensor* src_mask, + phi::DenseTensor* qk_out, + phi::DenseTensor* softmax_out) { if (nonbatched_bias) { - std::vector ins = {qk_out, src_mask, nonbatched_bias}; - std::vector outs = {qk_out}; + std::vector ins = { + qk_out, src_mask, nonbatched_bias}; + std::vector outs = {qk_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, -1, TernaryAddFunctor()); } else { - std::vector ins = {qk_out, src_mask}; - std::vector outs = {qk_out}; + std::vector ins = {qk_out, src_mask}; + std::vector outs = {qk_out}; phi::funcs::BroadcastKernel( dev_ctx_, ins, &outs, -1, phi::funcs::AddFunctor()); } @@ -696,11 +699,11 @@ class FMHAGateRef { // src_mask_out = qk_out + nonbatched_bias + src_mask // softmax_out = softmax(src_mask_out) - void ComputeBiasMaskSoftmaxBackward(const Tensor* softmax_out_grad, - const Tensor* softmax_out, - Tensor* src_mask_grad, - Tensor* qk_out_grad, - Tensor* nonbatched_bias_grad) { + void ComputeBiasMaskSoftmaxBackward(const phi::DenseTensor* softmax_out_grad, + const phi::DenseTensor* softmax_out, + phi::DenseTensor* src_mask_grad, + phi::DenseTensor* qk_out_grad, + phi::DenseTensor* nonbatched_bias_grad) { PADDLE_ENFORCE_NOT_NULL( qk_out_grad, platform::errors::NotFound("The qk_out_grad can not be nullptr.")); diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cc b/paddle/fluid/operators/fused/fused_gate_attention_op.cc index 0823f391fd086..ce7929c39ffa8 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; class FusedGateAttentionOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cu b/paddle/fluid/operators/fused/fused_gate_attention_op.cu index 413dc41dbd17c..8f13424ce49b5 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cu @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct SigmoidMultiplyFunctor { @@ -69,7 +69,7 @@ void ComputeMergedQKVMatmulForward(const framework::ExecutionContext &ctx, // query: shape=[batch_size, seq_len_m, seq_len_r, qkv_dim] // qkv_weight: shape=[3, num_heads, head_dim, qkv_dim] // qkv_out: shape=[batch_size, seq_len_m, seq_len_r, 3, num_heads, head_dim] - auto *qkv_weight = ctx.Input("QKVWeight"); + auto *qkv_weight = ctx.Input("QKVWeight"); // qkv_out = GEMM(query, qkv_weight^T) int m = config.batch_size * config.seq_len_m * config.seq_len_r; @@ -87,9 +87,9 @@ void ComputeMergedQKVMatmulBackward(const framework::ExecutionContext &ctx, const Tensor *qkv_out_grad, Tensor *query_grad, bool use_addto) { - auto *qkv_weight = ctx.Input("QKVWeight"); + auto *qkv_weight = ctx.Input("QKVWeight"); auto *qkv_weight_grad = - ctx.Output(framework::GradVarName("QKVWeight")); + ctx.Output(framework::GradVarName("QKVWeight")); auto &dev_ctx = ctx.template device_context(); dev_ctx.Alloc(qkv_weight_grad, qkv_weight_grad->numel() * sizeof(T)); @@ -116,9 +116,9 @@ void ComputeSeparatedQKVMatmulForward(const framework::ExecutionContext &ctx, Tensor *query_out, Tensor *key_out, Tensor *value_out) { - auto *query_weight = ctx.Input("QueryWeight"); - auto *key_weight = ctx.Input("KeyWeight"); - auto *value_weight = ctx.Input("ValueWeight"); + auto *query_weight = ctx.Input("QueryWeight"); + auto *key_weight = ctx.Input("KeyWeight"); + auto *value_weight = ctx.Input("ValueWeight"); // query_out = GEMM(query, query_weight) // query: shape=[batch_size, seq_len_m, seq_len_r, q_dim] @@ -158,9 +158,9 @@ void ComputeSeparatedQKVMatmulBackward(const framework::ExecutionContext &ctx, Tensor *key_grad, bool use_addto) { // Gradient of GEMM(key, k_weight) - const auto *key_weight = ctx.Input("KeyWeight"); + const auto *key_weight = ctx.Input("KeyWeight"); auto *key_weight_grad = - ctx.Output(framework::GradVarName("KeyWeight")); + ctx.Output(framework::GradVarName("KeyWeight")); auto &dev_ctx = ctx.template device_context(); dev_ctx.Alloc(key_weight_grad, key_weight_grad->numel() * sizeof(T)); @@ -173,9 +173,9 @@ void ComputeSeparatedQKVMatmulBackward(const framework::ExecutionContext &ctx, key, key_weight, key_out_grad, key_grad, key_weight_grad, nullptr, false); // Gradient of GEMM(value, v_weight) - auto *value_weight = ctx.Input("ValueWeight"); + auto *value_weight = ctx.Input("ValueWeight"); auto *value_weight_grad = - ctx.Output(framework::GradVarName("ValueWeight")); + ctx.Output(framework::GradVarName("ValueWeight")); dev_ctx.Alloc(value_weight_grad, value_weight_grad->numel() * sizeof(T)); kv_compute.ComputeBackward(key, @@ -187,9 +187,9 @@ void ComputeSeparatedQKVMatmulBackward(const framework::ExecutionContext &ctx, true); // Gradient of GEMM(query, query_weight) - const auto *query_weight = ctx.Input("QueryWeight"); + const auto *query_weight = ctx.Input("QueryWeight"); auto *query_weight_grad = - ctx.Output(framework::GradVarName("QueryWeight")); + ctx.Output(framework::GradVarName("QueryWeight")); dev_ctx.Alloc(query_weight_grad, query_weight_grad->numel() * sizeof(T)); int q_m = config.batch_size * config.seq_len_m * config.seq_len_r; @@ -212,8 +212,8 @@ void ComputeGatingLinearForward(const framework::ExecutionContext &ctx, const Tensor *query, const Tensor *fmha_out, Tensor *gate_out) { - auto *gate_weight = ctx.Input("GateWeight"); - auto *gate_bias = ctx.Input("GateBias"); + auto *gate_weight = ctx.Input("GateWeight"); + auto *gate_bias = ctx.Input("GateBias"); // The first gate_bias_out stores the result of the multiplication, // and the second gate_bias_out stores the result of the multiplication + @@ -242,8 +242,8 @@ void ComputeGatingLinearBackward(const framework::ExecutionContext &ctx, const Tensor *gate_out_grad, Tensor *query_grad, Tensor *fmha_out_grad) { - const auto *gate_weight = ctx.Input("GateWeight"); - const auto *gate_bias = ctx.Input("GateBias"); + const auto *gate_weight = ctx.Input("GateWeight"); + const auto *gate_bias = ctx.Input("GateBias"); auto &dev_ctx = ctx.template device_context(); // Re-compute gate_bias_out Tensor gate_bias_out; @@ -267,8 +267,9 @@ void ComputeGatingLinearBackward(const framework::ExecutionContext &ctx, // Gradient of GEMM(query, gate_weight) + gate_bias auto *gate_weight_grad = - ctx.Output(framework::GradVarName("GateWeight")); - auto *gate_bias_grad = ctx.Output(framework::GradVarName("GateBias")); + ctx.Output(framework::GradVarName("GateWeight")); + auto *gate_bias_grad = + ctx.Output(framework::GradVarName("GateBias")); dev_ctx.Alloc(gate_weight_grad, gate_weight_grad->numel() * sizeof(T)); dev_ctx.Alloc(gate_bias_grad, gate_bias_grad->numel() * sizeof(T)); @@ -285,8 +286,9 @@ void ComputeOutputLinearForward(const framework::ExecutionContext &ctx, const GateAttentionConfig &config, const Tensor *fmha_or_gate_out, Tensor *out) { - const auto *out_linear_weight = ctx.Input("OutLinearWeight"); - const auto *out_linear_bias = ctx.Input("OutLinearBias"); + const auto *out_linear_weight = + ctx.Input("OutLinearWeight"); + const auto *out_linear_bias = ctx.Input("OutLinearBias"); // out = GEMM(fmha_or_gate_out, out_linear_weight) + out_linear_bias int m = config.batch_size * config.seq_len_m * config.seq_len_r; @@ -304,13 +306,15 @@ void ComputeOutputLinearBackward(const framework::ExecutionContext &ctx, const Tensor *input, Tensor *input_grad) { auto &dev_ctx = ctx.template device_context(); - const auto *out_grad = ctx.Input(framework::GradVarName("Out")); - const auto *out_linear_weight = ctx.Input("OutLinearWeight"); + const auto *out_grad = + ctx.Input(framework::GradVarName("Out")); + const auto *out_linear_weight = + ctx.Input("OutLinearWeight"); auto *out_linear_weight_grad = - ctx.Output(framework::GradVarName("OutLinearWeight")); + ctx.Output(framework::GradVarName("OutLinearWeight")); auto *out_linear_bias_grad = - ctx.Output(framework::GradVarName("OutLinearBias")); + ctx.Output(framework::GradVarName("OutLinearBias")); dev_ctx.Alloc(out_linear_weight_grad, out_linear_weight_grad->numel() * sizeof(T)); @@ -334,23 +338,23 @@ template class FusedGateAttentionOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *query = ctx.Input("Query"); - const auto *key = ctx.Input("Key"); - const auto *query_weight = ctx.Input("QueryWeight"); - const auto *qkv_weight = ctx.Input("QKVWeight"); + const auto *query = ctx.Input("Query"); + const auto *key = ctx.Input("Key"); + const auto *query_weight = ctx.Input("QueryWeight"); + const auto *qkv_weight = ctx.Input("QKVWeight"); - const auto *src_mask = ctx.Input("SrcMask"); - const auto *nonbatched_bias = ctx.Input("NonbatchedBias"); + const auto *src_mask = ctx.Input("SrcMask"); + const auto *nonbatched_bias = ctx.Input("NonbatchedBias"); - auto *q_transpose_out = ctx.Output("QueryTransposeOut"); - auto *k_transpose_out = ctx.Output("KeyTransposeOut"); - auto *v_transpose_out = ctx.Output("ValueTransposeOut"); - auto *qkv_transpose_out = ctx.Output("QKVTransposeOut"); + auto *q_transpose_out = ctx.Output("QueryTransposeOut"); + auto *k_transpose_out = ctx.Output("KeyTransposeOut"); + auto *v_transpose_out = ctx.Output("ValueTransposeOut"); + auto *qkv_transpose_out = ctx.Output("QKVTransposeOut"); - auto *softmax_out = ctx.Output("SoftmaxOut"); - auto *fmha_out = ctx.Output("FMHAOut"); - auto *gate_out = ctx.Output("GateOut"); - auto *out = ctx.Output("Out"); + auto *softmax_out = ctx.Output("SoftmaxOut"); + auto *fmha_out = ctx.Output("FMHAOut"); + auto *gate_out = ctx.Output("GateOut"); + auto *out = ctx.Output("Out"); const bool merge_qkv = ctx.Attr("merge_qkv"); const bool has_gating = ctx.Attr("has_gating"); @@ -424,24 +428,29 @@ class FusedGateAttentionGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { // forward input - const auto *query = ctx.Input("Query"); - const auto *key = ctx.Input("Key"); - const auto *query_weight = ctx.Input("QueryWeight"); - const auto *qkv_weight = ctx.Input("QKVWeight"); + const auto *query = ctx.Input("Query"); + const auto *key = ctx.Input("Key"); + const auto *query_weight = ctx.Input("QueryWeight"); + const auto *qkv_weight = ctx.Input("QKVWeight"); // forward output, backward input - const auto *q_transpose_out = ctx.Input("QueryTransposeOut"); - const auto *k_transpose_out = ctx.Input("KeyTransposeOut"); - const auto *v_transpose_out = ctx.Input("ValueTransposeOut"); - const auto *qkv_transpose_out = ctx.Input("QKVTransposeOut"); - const auto *softmax_out = ctx.Input("SoftmaxOut"); - const auto *fmha_out = ctx.Input("FMHAOut"); - const auto *gate_out = ctx.Input("GateOut"); + const auto *q_transpose_out = + ctx.Input("QueryTransposeOut"); + const auto *k_transpose_out = + ctx.Input("KeyTransposeOut"); + const auto *v_transpose_out = + ctx.Input("ValueTransposeOut"); + const auto *qkv_transpose_out = + ctx.Input("QKVTransposeOut"); + const auto *softmax_out = ctx.Input("SoftmaxOut"); + const auto *fmha_out = ctx.Input("FMHAOut"); + const auto *gate_out = ctx.Input("GateOut"); // backward output - auto *query_grad = ctx.Output(framework::GradVarName("Query")); + auto *query_grad = + ctx.Output(framework::GradVarName("Query")); auto *nonbatched_bias_grad = - ctx.Output(framework::GradVarName("NonbatchedBias")); + ctx.Output(framework::GradVarName("NonbatchedBias")); bool has_gating = ctx.Attr("has_gating"); bool merge_qkv = ctx.Attr("merge_qkv"); @@ -501,7 +510,8 @@ class FusedGateAttentionGradKernel : public framework::OpKernel { ctx, config, query, qkv_out_grad, query_grad, use_addto); } else { // 4. Gradient of Separated QKV Matmul - auto *key_grad = ctx.Output(framework::GradVarName("Key")); + auto *key_grad = + ctx.Output(framework::GradVarName("Key")); if (key_grad) { AllocWithDebugInfo(dev_ctx, "key_grad", key_grad); } diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc index d14e30a5f7f2a..e5f80e2511e2d 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedGemmEpilogueOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu index 22340210b5715..5f3c60df9a080 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cu @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class FusedGemmEpilogueKernel : public framework::OpKernel { @@ -31,12 +31,13 @@ class FusedGemmEpilogueKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - const Tensor* x = ctx.Input("X"); - const Tensor* y = ctx.Input("Y"); - const Tensor* bias = ctx.Input("Bias"); + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* y = ctx.Input("Y"); + const phi::DenseTensor* bias = ctx.Input("Bias"); - Tensor* out = ctx.Output("Out"); - Tensor* reserve_space = ctx.Output("ReserveSpace"); + phi::DenseTensor* out = ctx.Output("Out"); + phi::DenseTensor* reserve_space = + ctx.Output("ReserveSpace"); bool trans_x = ctx.Attr("trans_x"); bool trans_y = ctx.Attr("trans_y"); @@ -322,14 +323,15 @@ class FusedGemmEpilogueGradKernel : public framework::OpKernel { static void ComputeImpl(const framework::ExecutionContext& ctx) { using Trait = FusedGEMMGradTrait; auto& dev_ctx = ctx.template device_context(); - const Tensor* dout = ctx.Input("DOut"); - const Tensor* x = ctx.Input("X"); - const Tensor* y = ctx.Input("Y"); - const Tensor* reserve_space = ctx.Input("ReserveSpace"); - - Tensor* dx = ctx.Output("DX"); - Tensor* dy = ctx.Output("DY"); - Tensor* dbias = ctx.Output("DBias"); + const phi::DenseTensor* dout = ctx.Input("DOut"); + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* y = ctx.Input("Y"); + const phi::DenseTensor* reserve_space = + ctx.Input("ReserveSpace"); + + phi::DenseTensor* dx = ctx.Output("DX"); + phi::DenseTensor* dy = ctx.Output("DY"); + phi::DenseTensor* dbias = ctx.Output("DBias"); std::string activation_grad = ctx.Attr("activation_grad"); diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc index 2b4b03e32cd8e..b1707ff55950d 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op_xpu.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class FusedGemmEpilogueXPUKernel : public framework::OpKernel { @@ -32,12 +32,13 @@ class FusedGemmEpilogueXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - const Tensor* x = ctx.Input("X"); - const Tensor* y = ctx.Input("Y"); - const Tensor* bias = ctx.Input("Bias"); + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* y = ctx.Input("Y"); + const phi::DenseTensor* bias = ctx.Input("Bias"); - Tensor* out = ctx.Output("Out"); - Tensor* reserve_space = ctx.Output("ReserveSpace"); + phi::DenseTensor* out = ctx.Output("Out"); + phi::DenseTensor* reserve_space = + ctx.Output("ReserveSpace"); bool trans_x = ctx.Attr("trans_x"); bool trans_y = ctx.Attr("trans_y"); @@ -112,15 +113,16 @@ class FusedGemmEpilogueXPUGradKernel : public framework::OpKernel { bool trans_x = ctx.Attr("trans_x"); bool trans_y = ctx.Attr("trans_y"); auto& dev_ctx = ctx.template device_context(); - const Tensor* dout = ctx.Input("DOut"); - const Tensor* x = ctx.Input("X"); - const Tensor* y = ctx.Input("Y"); + const phi::DenseTensor* dout = ctx.Input("DOut"); + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* y = ctx.Input("Y"); - const Tensor* reserve_space = ctx.Input("ReserveSpace"); + const phi::DenseTensor* reserve_space = + ctx.Input("ReserveSpace"); - Tensor* dx = ctx.Output("DX"); - Tensor* dy = ctx.Output("DY"); - Tensor* dbias = ctx.Output("DBias"); + phi::DenseTensor* dx = ctx.Output("DX"); + phi::DenseTensor* dy = ctx.Output("DY"); + phi::DenseTensor* dbias = ctx.Output("DBias"); std::string activation = "none"; if (ctx.HasAttr("activation")) { diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias_test.cu b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias_test.cu index d3c6cca95efb0..f383d6846f946 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias_test.cu +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias_test.cu @@ -41,9 +41,9 @@ struct TestFusedLayernormResidualDropoutBias { bool has_bias = true; bool has_scale = true; bool has_layernorm_bias = true; - framework::Tensor src, residual, bias, out, mask, scale, layernorm_bias, + phi::DenseTensor src, residual, bias, out, mask, scale, layernorm_bias, layernorm_out, means, vars; - framework::Tensor dsrc, dbias; + phi::DenseTensor dsrc, dbias; std::vector src_vec, residual_vec, bias_vec; std::vector> means_vec, vars_vec, scale_vec, diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc index 9572a87aba21d..2a2d1f27edd9c 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedMultiTransformerINT8Op : public framework::OperatorWithKernel { private: diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu index 8e200275f8171..fe1ee3449a102 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu @@ -25,9 +25,9 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { using U = LayerNormParamType; auto &dev_ctx = ctx.cuda_device_context(); - auto *time_step = ctx.Input("TimeStep"); + auto *time_step = ctx.Input("TimeStep"); // 0. input - auto *input_x = ctx.Input("X"); + auto *input_x = ctx.Input("X"); const auto input_x_dims = input_x->dims(); int bsz = input_x_dims[0]; int seq_len = input_x_dims[1]; @@ -48,10 +48,11 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { // dequant output scales, tensor, size = [num_layers, n], n is gemm output // size - auto *qkv_out_scale = ctx.Input("QKVOutScale"); - auto *out_linear_out_scale = ctx.Input("OutLinearOutScale"); - auto *ffn1_out_scale = ctx.Input("FFN1OutScale"); - auto *ffn2_out_scale = ctx.Input("FFN2OutScale"); + auto *qkv_out_scale = ctx.Input("QKVOutScale"); + auto *out_linear_out_scale = + ctx.Input("OutLinearOutScale"); + auto *ffn1_out_scale = ctx.Input("FFN1OutScale"); + auto *ffn2_out_scale = ctx.Input("FFN2OutScale"); int qkv_out_scale_n = qkv_out_scale->dims()[1]; int out_linear_out_scale_n = out_linear_out_scale->dims()[1]; @@ -61,8 +62,8 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { // 1. layer norm const auto pre_layer_norm = ctx.Attr("pre_layer_norm"); const float epsilon = ctx.Attr("epsilon"); - auto ln_scales = ctx.MultiInput("LnScale"); - auto ln_biases = ctx.MultiInput("LnBias"); + auto ln_scales = ctx.MultiInput("LnScale"); + auto ln_biases = ctx.MultiInput("LnBias"); auto ln_compute = AttnLayerNorm(dev_ctx, epsilon, bsz_seq, dim_embed); @@ -76,8 +77,8 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { // 2. qkv // x: qkv's input [batch_size, seq_len, dim_embed] // y: qkv's weight: [3, num_head, dim_head, dim_embed] - auto qkv_weights = ctx.MultiInput("QKVW"); - auto qkv_biases = ctx.MultiInput("QKVBias"); + auto qkv_weights = ctx.MultiInput("QKVW"); + auto qkv_biases = ctx.MultiInput("QKVBias"); const bool trans_qkvw = ctx.Attr("trans_qkvw"); const auto qkv_w_dims = qkv_weights[0]->dims(); int num_head = trans_qkvw ? qkv_w_dims[1] : qkv_w_dims[2]; @@ -100,10 +101,10 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { true, "upscale_in_train", 0.0, true, true, 0, nullptr); auto fmha_compute = FMHARef(dev_ctx, bsz, seq_len, num_head, dim_head, attn_param); - auto *src_mask = ctx.Input("SrcMask"); - auto cache_kvs = ctx.MultiInput("CacheKV"); - auto cache_kv_outs = ctx.MultiOutput("CacheKVOut"); - // auto *time_step = ctx.Input("TimeStep"); + auto *src_mask = ctx.Input("SrcMask"); + auto cache_kvs = ctx.MultiInput("CacheKV"); + auto cache_kv_outs = ctx.MultiOutput("CacheKVOut"); + // auto *time_step = ctx.Input("TimeStep"); auto out_seq_len = seq_len; if (time_step) { @@ -156,8 +157,8 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { dev_ctx.Alloc(&fmha_out, fmha_out.numel() * sizeof(T)); // 4. out_linear - auto out_linear_weights = ctx.MultiInput("OutLinearW"); - auto out_linear_biases = ctx.MultiInput("OutLinearBias"); + auto out_linear_weights = ctx.MultiInput("OutLinearW"); + auto out_linear_biases = ctx.MultiInput("OutLinearBias"); int ring_id = ctx.Attr("ring_id"); // (transA, transB, compute_bias) = (false, false, false) AttnMatmulINT8 out_linear_compute( @@ -171,8 +172,8 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { FusedDropoutLayerNormHelper fused_dropout_layernorm_helper_for_post_layernorm( dev_ctx, bsz_seq, dim_embed, dropout_param2, epsilon); - auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); - auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); + auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); + auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); Tensor bias_dropout_residual_out, dropout_mask_out; T *bias_dropout_residual_out_data = nullptr; if (pre_layer_norm) { @@ -186,8 +187,8 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { &dropout_mask_out, dropout_mask_out.numel() * sizeof(uint8_t)); // 6. ffn matmul1 - auto ffn1_weights = ctx.MultiInput("FFN1Weight"); - auto ffn1_biases = ctx.MultiInput("FFN1Bias"); + auto ffn1_weights = ctx.MultiInput("FFN1Weight"); + auto ffn1_biases = ctx.MultiInput("FFN1Bias"); auto ffn1_weight_dim = ffn1_weights[0]->dims(); int dim_ffn = ffn1_weight_dim[0]; @@ -213,8 +214,8 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { &ffn1_dropout_mask, ffn1_dropout_mask.numel() * sizeof(uint8_t)); // 8. ffn2 matmul - auto ffn2_weights = ctx.MultiInput("FFN2Weight"); - auto ffn2_biases = ctx.MultiInput("FFN2Bias"); + auto ffn2_weights = ctx.MultiInput("FFN2Weight"); + auto ffn2_biases = ctx.MultiInput("FFN2Bias"); AttnMatmulINT8 ffn2_linear_compute( dev_ctx, bsz_seq, dim_embed, dim_ffn, false); @@ -245,7 +246,7 @@ class FusedMultiTransformerINT8OpKernel : public framework::OpKernel { output_workspace.numel() * sizeof(int32_t)); // calc - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); auto *from_data = dev_ctx.Alloc(out, out->numel() * sizeof(T)); Tensor *from_tensor = out; Tensor tmp_out; diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc index 86de140b9cde8..ede6300decbe5 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/fused/fused_multi_transformer_op.h" + #include #include @@ -21,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusedMultiTransformerOp : public framework::OperatorWithKernel { private: diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index 5cf22885aabba..b70f0c7ea1965 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -21,9 +21,9 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { using U = LayerNormParamType; auto &dev_ctx = ctx.cuda_device_context(); - auto *time_step = ctx.Input("TimeStep"); + auto *time_step = ctx.Input("TimeStep"); // 0. input - auto *input_x = ctx.Input("X"); + auto *input_x = ctx.Input("X"); const auto input_x_dims = input_x->dims(); int bsz = input_x_dims[0]; int seq_len = input_x_dims[1]; @@ -33,8 +33,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { // 1. layer norm const auto pre_layer_norm = ctx.Attr("pre_layer_norm"); const float epsilon = ctx.Attr("epsilon"); - auto ln_scales = ctx.MultiInput("LnScale"); - auto ln_biases = ctx.MultiInput("LnBias"); + auto ln_scales = ctx.MultiInput("LnScale"); + auto ln_biases = ctx.MultiInput("LnBias"); auto ln_compute = AttnLayerNorm(dev_ctx, epsilon, bsz_seq, dim_embed); Tensor ln_mean, ln_var; @@ -47,8 +47,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { // 2. qkv // x: qkv's input [batch_size, seq_len, dim_embed] // y: qkv's weight: [3, num_head, dim_head, dim_embed] - auto qkv_weights = ctx.MultiInput("QKVW"); - auto qkv_biases = ctx.MultiInput("QKVBias"); + auto qkv_weights = ctx.MultiInput("QKVW"); + auto qkv_biases = ctx.MultiInput("QKVBias"); const bool trans_qkvw = ctx.Attr("trans_qkvw"); const auto qkv_w_dims = qkv_weights[0]->dims(); int num_head = trans_qkvw ? qkv_w_dims[1] : qkv_w_dims[2]; @@ -76,10 +76,10 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { true, "upscale_in_train", 0.0, true, true, 0, nullptr); auto fmha_compute = FMHARef(dev_ctx, bsz, seq_len, num_head, dim_head, attn_param); - auto *src_mask = ctx.Input("SrcMask"); - auto cache_kvs = ctx.MultiInput("CacheKV"); - auto cache_kv_outs = ctx.MultiOutput("CacheKVOut"); - // auto *time_step = ctx.Input("TimeStep"); + auto *src_mask = ctx.Input("SrcMask"); + auto cache_kvs = ctx.MultiInput("CacheKV"); + auto cache_kv_outs = ctx.MultiOutput("CacheKVOut"); + // auto *time_step = ctx.Input("TimeStep"); auto out_seq_len = seq_len; if (time_step) { @@ -132,8 +132,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { dev_ctx.Alloc(&fmha_out, fmha_out.numel() * sizeof(T)); // 4. out_linear - auto out_linear_weights = ctx.MultiInput("OutLinearW"); - auto out_linear_biases = ctx.MultiInput("OutLinearBias"); + auto out_linear_weights = ctx.MultiInput("OutLinearW"); + auto out_linear_biases = ctx.MultiInput("OutLinearBias"); int ring_id = ctx.Attr("ring_id"); // (transA, transB, compute_bias) = (false, false, false) auto out_linear_compute = AttnMatMul( @@ -143,8 +143,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { DropoutParam dropout_param2(true, 0, true, true, 0.0, nullptr, 0); FusedDropoutLayerNormHelper fused_dropout_layernorm_helper( dev_ctx, bsz_seq, dim_embed, dropout_param2, epsilon); - auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); - auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); + auto ffn_ln_scales = ctx.MultiInput("FFNLnScale"); + auto ffn_ln_biases = ctx.MultiInput("FFNLnBias"); Tensor bias_dropout_residual_out, dropout_mask_out; T *bias_dropout_residual_out_data = nullptr; if (pre_layer_norm) { @@ -158,8 +158,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { &dropout_mask_out, dropout_mask_out.numel() * sizeof(uint8_t)); // 6. ffn matmul1 - auto ffn1_weights = ctx.MultiInput("FFN1Weight"); - auto ffn1_biases = ctx.MultiInput("FFN1Bias"); + auto ffn1_weights = ctx.MultiInput("FFN1Weight"); + auto ffn1_biases = ctx.MultiInput("FFN1Bias"); auto ffn1_weight_dim = ffn1_weights[0]->dims(); int dim_ffn = ffn1_weight_dim[1]; @@ -183,8 +183,8 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { &ffn1_dropout_mask, ffn1_dropout_mask.numel() * sizeof(uint8_t)); // 8. ffn2 matmul - auto ffn2_weights = ctx.MultiInput("FFN2Weight"); - auto ffn2_biases = ctx.MultiInput("FFN2Bias"); + auto ffn2_weights = ctx.MultiInput("FFN2Weight"); + auto ffn2_biases = ctx.MultiInput("FFN2Bias"); auto ffn2_linear_compute = AttnMatMul( dev_ctx, false, false, bsz_seq, dim_embed, dim_ffn, false); @@ -194,7 +194,7 @@ class FusedMultiTransformerOpKernel : public framework::OpKernel { dev_ctx, bsz_seq, dim_embed, ffn2_dropout_param, epsilon); // calc - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); auto *from_data = dev_ctx.Alloc(out, out->numel() * sizeof(T)); Tensor *from_tensor = out; Tensor tmp_out; diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.h index 761a31ce094d1..e0795616fd951 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.h +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.h @@ -41,13 +41,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; // for debug // #define _DEBUG_FUSED_MULTI_TRANSFORMER template -static void AllReduce(framework::Tensor &tensor, // NOLINT +static void AllReduce(phi::DenseTensor &tensor, // NOLINT const int ring_id, const int count, const phi::GPUContext &ctx) { diff --git a/paddle/fluid/operators/fused/fused_residual_dropout_bias_test.cu b/paddle/fluid/operators/fused/fused_residual_dropout_bias_test.cu index ba0652339e96e..79eb5f64cf0ec 100644 --- a/paddle/fluid/operators/fused/fused_residual_dropout_bias_test.cu +++ b/paddle/fluid/operators/fused/fused_residual_dropout_bias_test.cu @@ -50,8 +50,8 @@ struct FusedResidualDropoutBiasTester { bool has_bias = true; bool add_residual = true; - framework::Tensor src, residual, bias, out, mask; - framework::Tensor dsrc, dbias; + phi::DenseTensor src, residual, bias, out, mask; + phi::DenseTensor dsrc, dbias; std::vector src_vec, residual_vec, bias_vec; std::vector correct_out, correct_dsrc, correct_dbias; diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu index dbfabe07f474f..a58a5ea01d02e 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu @@ -425,7 +425,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto inputs = ctx.MultiInput("X"); - auto outputs = ctx.MultiOutput("Out"); + auto outputs = ctx.MultiOutput("Out"); auto &dev_ctx = ctx.template device_context(); const auto slot_size = inputs.size(); std::vector input_data(slot_size); diff --git a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h index 009a9253ab351..60723c6cb5d17 100644 --- a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h +++ b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - namespace plat = paddle::platform; #define FINAL_MASK 0xffffffff diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu index 194d171c46e7a..9eee08600ae0e 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cu +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cu @@ -20,7 +20,7 @@ namespace paddle { namespace operators { #if CUDNN_VERSION >= 7100 -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using ScopedFilterDescriptor = platform::ScopedFilterDescriptor; using ScopedConvolutionDescriptor = platform::ScopedConvolutionDescriptor; @@ -40,12 +40,12 @@ class CUDNNConvInceptionFusionOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* input = ctx.Input("Input"); - auto filters = ctx.MultiInput("Filter"); - auto bias = ctx.MultiInput("Bias"); + auto* input = ctx.Input("Input"); + auto filters = ctx.MultiInput("Filter"); + auto bias = ctx.MultiInput("Bias"); - auto* output = ctx.Output("Output"); - auto temp_outs = ctx.MultiOutput("TempOutput"); + auto* output = ctx.Output("Output"); + auto temp_outs = ctx.MultiOutput("TempOutput"); const std::string pool_type = ctx.Attr("pooling_type"); const std::string activation = ctx.Attr("activation"); diff --git a/paddle/fluid/operators/fused/fusion_group_op_test.cc b/paddle/fluid/operators/fused/fusion_group_op_test.cc index e0ee074e7f2a7..6688501fe9ac5 100644 --- a/paddle/fluid/operators/fused/fusion_group_op_test.cc +++ b/paddle/fluid/operators/fused/fusion_group_op_test.cc @@ -26,10 +26,10 @@ namespace operators { using CPUKernelFunc = std::function args)>; template -framework::Tensor* CreateTensor(framework::Scope* scope, - const platform::Place& place, - const std::string& name, - const std::vector& shape) { +phi::DenseTensor* CreateTensor(framework::Scope* scope, + const platform::Place& place, + const std::string& name, + const std::vector& shape) { auto* var = scope->Var(name); auto* tensor = var->GetMutable(); if (shape.size() > 0) { @@ -39,7 +39,7 @@ framework::Tensor* CreateTensor(framework::Scope* scope, } template -void SetupRandomCPUTensor(framework::Tensor* tensor, +void SetupRandomCPUTensor(phi::DenseTensor* tensor, const std::vector& shape) { static unsigned int seed = 100; std::mt19937 rng(seed++); @@ -104,10 +104,10 @@ void PrepareDeviceCode(platform::Place place, void CheckOutputs(framework::Scope* scope, const std::vector& output_names, - std::vector* cpu_tensors, + std::vector* cpu_tensors, size_t num_inputs, CPUKernelFunc cpu_kernel_func) { - std::vector cpu_outputs; + std::vector cpu_outputs; cpu_outputs.resize(output_names.size()); for (size_t j = 0; j < output_names.size(); ++j) { auto* var = scope->Var(output_names[j]); @@ -158,11 +158,11 @@ void TestMain(const std::vector& input_names, framework::Scope scope; // Prepare input tensors. - std::vector cpu_tensors; + std::vector cpu_tensors; cpu_tensors.resize(input_names.size() + output_names.size()); for (size_t i = 0; i < input_names.size(); ++i) { SetupRandomCPUTensor(&(cpu_tensors[i]), input_shapes[i]); - framework::Tensor* dev_tensor = + phi::DenseTensor* dev_tensor = CreateTensor(&scope, place, input_names[i], input_shapes[i]); paddle::framework::TensorCopySync(cpu_tensors[i], place, dev_tensor); } diff --git a/paddle/fluid/operators/fused/fusion_gru_op.cc b/paddle/fluid/operators/fused/fusion_gru_op.cc index e2d2cf071caba..a8ad8c9cbf9ba 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.cc +++ b/paddle/fluid/operators/fused/fusion_gru_op.cc @@ -269,7 +269,7 @@ class FusionGRUKernel : public framework::OpKernel { #define INIT_BASE_DEFINES \ auto* x = ctx.Input("X"); \ - auto* wh = ctx.Input("WeightH"); \ + auto* wh = ctx.Input("WeightH"); \ auto* xx = ctx.Output("XX"); \ auto x_lod = x->lod(); \ auto x_dims = x->dims(); /* T x M*/ \ @@ -281,9 +281,9 @@ class FusionGRUKernel : public framework::OpKernel { const int D3 = wh_dims[1] #define INIT_OTHER_DEFINES \ - auto* h0 = ctx.Input("H0"); \ - auto* wx = ctx.Input("WeightX"); \ - auto* bias = ctx.Input("Bias"); \ + auto* h0 = ctx.Input("H0"); \ + auto* wx = ctx.Input("WeightX"); \ + auto* bias = ctx.Input("Bias"); \ auto* hidden_out = ctx.Output("Hidden"); \ bool is_reverse = ctx.Attr("is_reverse"); \ const int M = x_mat_dims[1]; \ @@ -408,7 +408,7 @@ class FusionGRUKernel : public framework::OpKernel { return; } INIT_OTHER_DEFINES; - auto* reordered_h0 = ctx.Output("ReorderedH0"); + auto* reordered_h0 = ctx.Output("ReorderedH0"); auto* batched_input = ctx.Output("BatchedInput"); auto* batched_out = ctx.Output("BatchedOut"); T* batched_input_data = batched_input->mutable_data(place); diff --git a/paddle/fluid/operators/fused/fusion_gru_op.h b/paddle/fluid/operators/fused/fusion_gru_op.h index eaa59cd412f8f..2e57998b71f59 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.h +++ b/paddle/fluid/operators/fused/fusion_gru_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionGRUOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index 5454c90b3c596..69561e0df1ffa 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -309,11 +309,11 @@ class FuisonLSTMKernel : public framework::OpKernel { #define INIT_BASE_DEFINES \ using DeviceContext = phi::CPUContext; \ auto* x = ctx.Input("X"); \ - auto* h0 = ctx.Input("H0"); \ - auto* c0 = ctx.Input("C0"); \ - auto* wx = ctx.Input("WeightX"); \ - auto* wh = ctx.Input("WeightH"); \ - auto* bias = ctx.Input("Bias"); \ + auto* h0 = ctx.Input("H0"); \ + auto* c0 = ctx.Input("C0"); \ + auto* wx = ctx.Input("WeightX"); \ + auto* wh = ctx.Input("WeightH"); \ + auto* bias = ctx.Input("Bias"); \ auto* xx = ctx.Output("XX"); \ auto* hidden_out = ctx.Output("Hidden"); \ auto* cell_out = ctx.Output("Cell"); \ @@ -336,7 +336,7 @@ class FuisonLSTMKernel : public framework::OpKernel { auto place = ctx.GetPlace(); \ if (use_peepholes) { \ /* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \ - auto* checked_cell = ctx.Output("CheckedCell"); \ + auto* checked_cell = ctx.Output("CheckedCell"); \ checked_cell_data = checked_cell->mutable_data(place); \ } \ const jit::lstm_attr_t attr( \ @@ -448,8 +448,8 @@ class FuisonLSTMKernel : public framework::OpKernel { } INIT_OTHER_DEFINES; - auto* reordered_h0 = ctx.Output("ReorderedH0"); - auto* reordered_c0 = ctx.Output("ReorderedC0"); + auto* reordered_h0 = ctx.Output("ReorderedH0"); + auto* reordered_c0 = ctx.Output("ReorderedC0"); auto* batched_input = ctx.Output("BatchedInput"); auto* batched_c_out = ctx.Output("BatchedCell"); auto* batched_h_out = ctx.Output("BatchedHidden"); diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.h b/paddle/fluid/operators/fused/fusion_lstm_op.h index 7f79601602348..2d64c592a2f6c 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.h +++ b/paddle/fluid/operators/fused/fusion_lstm_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionLSTMOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc index 983d51241491b..50291ee648141 100644 --- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc @@ -140,11 +140,11 @@ template class FusionRepeatedFCReluKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto in = ctx.Input("X"); - auto weights = ctx.MultiInput("W"); - auto biases = ctx.MultiInput("Bias"); - auto relus = ctx.MultiOutput("ReluOut"); - auto* out = ctx.Output("Out"); + auto in = ctx.Input("X"); + auto weights = ctx.MultiInput("W"); + auto biases = ctx.MultiInput("Bias"); + auto relus = ctx.MultiOutput("ReluOut"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); int weight_sz = static_cast(weights.size()); diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h index cdcaf8b483346..383353180eb38 100644 --- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h +++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionRepeatedFCReluOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc index 2ebac6d7f7124..64cc22224d385 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc @@ -151,10 +151,10 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { using DeviceContext = phi::CPUContext; auto* x = ctx.Input("X"); - auto* w = ctx.Input("Filter"); - auto* b = ctx.Input("Bias"); + auto* w = ctx.Input("Filter"); + auto* b = ctx.Input("Bias"); auto* y = ctx.Output("Out"); - auto* col = ctx.Output("ColMat"); + auto* col = ctx.Output("ColMat"); auto x_lod = x->lod(); auto x_dims = x->dims(); diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h index 028d79dc2a1ee..7ce582c398604 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionSeqConvEltAddReluOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc index 6655c6756a5c8..095a1c1deb153 100644 --- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc @@ -151,10 +151,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { using DeviceContext = phi::CPUContext; auto ins = ctx.MultiInput("X"); - auto* w = ctx.Input("FCWeight"); - auto* b = ctx.Input("FCBias"); + auto* w = ctx.Input("FCWeight"); + auto* b = ctx.Input("FCBias"); auto* out = ctx.Output("Out"); - auto* fc_out = ctx.Output("FCOut"); + auto* fc_out = ctx.Output("FCOut"); auto* ref_in = ins[0]; auto ref_lod = ref_in->lod(); diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h index f78e820f60335..30170eb17d6da 100644 --- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h +++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionSeqExpandConcatFCOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h index 9f882a59d351c..47204abb6d718 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h +++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionSeqPoolConcatOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h index 75e8556c31a81..5e3afd4e62b08 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FusionSeqPoolCVMConcatOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc index 6be6763492345..b7a01b7955887 100644 --- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc +++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc @@ -88,12 +88,12 @@ template class FusionSquaredMatSubKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto x = ctx.Input("X"); - auto y = ctx.Input("Y"); - auto* squared_x = ctx.Output("SquaredX"); - auto* squared_y = ctx.Output("SquaredY"); - auto* squared_xy = ctx.Output("SquaredXY"); - auto* out = ctx.Output("Out"); + auto x = ctx.Input("X"); + auto y = ctx.Input("Y"); + auto* squared_x = ctx.Output("SquaredX"); + auto* squared_y = ctx.Output("SquaredY"); + auto* squared_xy = ctx.Output("SquaredXY"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); T scalar = static_cast(ctx.Attr("scalar")); diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h index 0ab2c2bb10a15..c926613dc29fa 100644 --- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h +++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; // ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar class FusionSquaredMatSubOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc index c9900daf4ed06..e7bb037a3f3aa 100644 --- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class TransposeFlattenConcatFusionOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc index 4d063ba2be7cd..e5d32270bf4ee 100644 --- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc +++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc @@ -28,8 +28,8 @@ template class TransposeFlattenConcatFusionKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto ins = ctx.MultiInput("X"); - auto* out = ctx.Output("Out"); + auto ins = ctx.MultiInput("X"); + auto* out = ctx.Output("Out"); auto& dev_ctx = ctx.template device_context(); dev_ctx.Alloc(out, out->numel() * sizeof(T)); auto odims = out->dims(); diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc index ff983684708aa..a040aa3779323 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc @@ -21,7 +21,7 @@ namespace paddle { namespace operators { using paddle::framework::LoDTensor; -using paddle::framework::Tensor; + using paddle::platform::MKLDNNGetDataType; using paddle::platform::MKLDNNMemDesc; using phi::CPUContext; @@ -35,8 +35,8 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { const dnnl::engine mkldnn_engine, platform::Place cpu_place, const LoDTensor* input, - const Tensor* weight_h, - const Tensor* h0, + const phi::DenseTensor* weight_h, + const phi::DenseTensor* h0, const bool is_reverse, const int64_t N, const int64_t Ti, @@ -116,8 +116,8 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { } template - std::shared_ptr AcquireWeightXMemory(const Tensor* weight_x, - const bool origin_mode) { + std::shared_ptr AcquireWeightXMemory( + const phi::DenseTensor* weight_x, const bool origin_mode) { const std::string wx_key = this->memory_key_ + "@weight_x"; auto memory_p = std::static_pointer_cast(this->dev_ctx_.GetBlob(wx_key)); @@ -156,8 +156,8 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { } template - std::shared_ptr AcquireWeightHMemory(const Tensor* weight_h, - const bool origin_mode) { + std::shared_ptr AcquireWeightHMemory( + const phi::DenseTensor* weight_h, const bool origin_mode) { const std::string wh_key = this->memory_key_ + "@weight_h"; auto memory_p = std::static_pointer_cast(this->dev_ctx_.GetBlob(wh_key)); @@ -209,7 +209,7 @@ class GRUMKLDNNHandler : public RNNMKLDNNHandler { return memory_p; } - std::shared_ptr AcquireBiasMemory(const Tensor* bias, + std::shared_ptr AcquireBiasMemory(const phi::DenseTensor* bias, const bool origin_mode) { const std::string bias_key = this->memory_key_ + "@bias"; auto memory_p = std::static_pointer_cast( @@ -263,10 +263,10 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel { // Get Tensors const auto* input = ctx.Input("X"); - const auto* h0 = ctx.Input("H0"); - const auto* weight_x = ctx.Input("WeightX"); - const auto* weight_h = ctx.Input("WeightH"); - const auto* bias = ctx.Input("Bias"); + const auto* h0 = ctx.Input("H0"); + const auto* weight_x = ctx.Input("WeightX"); + const auto* weight_h = ctx.Input("WeightH"); + const auto* bias = ctx.Input("Bias"); auto* hidden = ctx.Output("Hidden"); auto x_dims = input->dims(); auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc index 748de5dae9520..6ecde2fdcf87e 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/fusion_lstm_mkldnn_op.cc @@ -21,7 +21,7 @@ namespace paddle { namespace operators { using paddle::framework::LoDTensor; -using paddle::framework::Tensor; + using paddle::platform::MKLDNNGetDataType; using paddle::platform::MKLDNNMemDesc; using phi::CPUContext; @@ -36,9 +36,9 @@ class LSTMMKLDNNHandler const dnnl::engine mkldnn_engine, platform::Place cpu_place, const LoDTensor* input, - const Tensor* weight_h, - const Tensor* h0, - const Tensor* c0, + const phi::DenseTensor* weight_h, + const phi::DenseTensor* h0, + const phi::DenseTensor* c0, const bool is_reverse, const int64_t N, const int64_t Ti, @@ -168,7 +168,8 @@ class LSTMMKLDNNHandler } template - std::shared_ptr AcquireWeightXMemory(const Tensor* weight_x) { + std::shared_ptr AcquireWeightXMemory( + const phi::DenseTensor* weight_x) { const std::string wx_key = this->memory_key_ + "@weight_x"; auto memory_p = std::static_pointer_cast(this->dev_ctx_.GetBlob(wx_key)); @@ -199,7 +200,8 @@ class LSTMMKLDNNHandler } template - std::shared_ptr AcquireWeightHMemory(const Tensor* weight_h) { + std::shared_ptr AcquireWeightHMemory( + const phi::DenseTensor* weight_h) { const std::string wh_key = this->memory_key_ + "@weight_h"; auto memory_p = std::static_pointer_cast(this->dev_ctx_.GetBlob(wh_key)); @@ -229,7 +231,8 @@ class LSTMMKLDNNHandler return memory_p; } - std::shared_ptr AcquireBiasMemory(const Tensor* bias) { + std::shared_ptr AcquireBiasMemory( + const phi::DenseTensor* bias) { const std::string bias_key = this->memory_key_ + "@bias"; auto memory_p = std::static_pointer_cast( this->dev_ctx_.GetBlob(bias_key)); @@ -256,7 +259,8 @@ class LSTMMKLDNNHandler return memory_p; } - std::shared_ptr AcquirePeepholeWeights(const Tensor* bias) { + std::shared_ptr AcquirePeepholeWeights( + const phi::DenseTensor* bias) { const std::string peepholes_key = this->memory_key_ + "@peepholes_weights"; auto memory_p = std::static_pointer_cast( this->dev_ctx_.GetBlob(peepholes_key)); @@ -282,7 +286,7 @@ class LSTMMKLDNNHandler return memory_p; } - std::shared_ptr AcquireC0Memory(const Tensor* c0) { + std::shared_ptr AcquireC0Memory(const phi::DenseTensor* c0) { const std::string c0_key = this->memory_key_ + "@c0"; auto memory_p = std::static_pointer_cast(this->dev_ctx_.GetBlob(c0_key)); @@ -340,11 +344,11 @@ class FusionLSTMMKLDNNKernel : public framework::OpKernel { // Get Tensors const auto* input = ctx.Input("X"); - const auto* h0 = ctx.Input("H0"); - const auto* c0 = ctx.Input("C0"); - const auto* weight_x = ctx.Input("WeightX"); - const auto* weight_h = ctx.Input("WeightH"); - const auto* bias = ctx.Input("Bias"); + const auto* h0 = ctx.Input("H0"); + const auto* c0 = ctx.Input("C0"); + const auto* weight_x = ctx.Input("WeightX"); + const auto* weight_h = ctx.Input("WeightH"); + const auto* bias = ctx.Input("Bias"); auto* hidden = ctx.Output("Hidden"); auto* cell = ctx.Output("Cell"); cell = cell; diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h index a357a59a09420..f4ae023f85e43 100644 --- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h +++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { using paddle::framework::LoDTensor; -using paddle::framework::Tensor; + using paddle::platform::CreateKey; using paddle::platform::MKLDNNGetDataType; using paddle::platform::MKLDNNMemDesc; @@ -35,8 +35,8 @@ class RNNMKLDNNHandler : public platform::MKLDNNHandlerT { const dnnl::engine mkldnn_engine, platform::Place cpu_place, const LoDTensor* input, - const Tensor* weight_h, - const Tensor* h0, + const phi::DenseTensor* weight_h, + const phi::DenseTensor* h0, const bool is_reverse, const int64_t N, const int64_t Ti, @@ -201,7 +201,7 @@ class RNNMKLDNNHandler : public platform::MKLDNNHandlerT { // TODO(jczaja) H0 should be updated each iter and of T type (Fusion pass does // not support in yet) template - std::shared_ptr AcquireH0Memory(const Tensor* h0) { + std::shared_ptr AcquireH0Memory(const phi::DenseTensor* h0) { const std::string h0_key = memory_key_ + "@h0"; auto memory_p = std::static_pointer_cast(this->dev_ctx_.GetBlob(h0_key)); diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index c59e7d661607c..372137511a2e6 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include -#include "dnnl.hpp" +#include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fused/multi_gru_op.h" @@ -27,7 +27,7 @@ namespace paddle { namespace operators { using paddle::framework::LoDTensor; -using paddle::framework::Tensor; + using paddle::platform::CreateKey; using paddle::platform::MKLDNNGetDataType; using paddle::platform::MKLDNNMemDesc; @@ -64,9 +64,9 @@ class MultiGRUHandler { layers_(ctx.Attr("layers")), concat_pds_(layers_, std::shared_ptr()), x_(ctx.Input("X")), - weights_x_(ctx.MultiInput("WeightX")), - weights_h_(ctx.MultiInput("WeightH")), - biases_(ctx.MultiInput("Bias")), + weights_x_(ctx.MultiInput("WeightX")), + weights_h_(ctx.MultiInput("WeightH")), + biases_(ctx.MultiInput("Bias")), hidden_(ctx.Output("Hidden")), x_lod_(x_->lod()[0]) { PADDLE_ENFORCE_EQ( @@ -672,9 +672,9 @@ class MultiGRUHandler { std::string memory_key_; const LoDTensor* x_; - const std::vector weights_x_; - const std::vector weights_h_; - const std::vector biases_; + const std::vector weights_x_; + const std::vector weights_h_; + const std::vector biases_; LoDTensor* hidden_; std::vector attrs_; const paddle::framework::Vector& x_lod_; diff --git a/paddle/fluid/operators/fused/multi_gru_op.h b/paddle/fluid/operators/fused/multi_gru_op.h index 8b064c8754f5e..ba239d20eb28f 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.h +++ b/paddle/fluid/operators/fused/multi_gru_op.h @@ -21,7 +21,6 @@ namespace operators { using framework::ExecutionContext; using framework::LoDTensor; -using framework::Tensor; class MultiGRUOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/multihead_matmul_op.cu b/paddle/fluid/operators/fused/multihead_matmul_op.cu index c2e2754830bbd..a258c0107859c 100644 --- a/paddle/fluid/operators/fused/multihead_matmul_op.cu +++ b/paddle/fluid/operators/fused/multihead_matmul_op.cu @@ -260,11 +260,11 @@ template class MultiHeadMatMulV2Kernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - using Tensor = framework::Tensor; - auto *input = context.Input("Input"); - auto *w = context.Input("W"); - auto *bias = context.Input("Bias"); - auto *bias_qk = context.Input("BiasQK"); + using Tensor = phi::DenseTensor; + auto *input = context.Input("Input"); + auto *w = context.Input("W"); + auto *bias = context.Input("Bias"); + auto *bias_qk = context.Input("BiasQK"); auto *input_d = input->data(); auto *w_d = w->data(); @@ -310,7 +310,7 @@ class MultiHeadMatMulV2Kernel : public framework::OpKernel { int all_head_size = w_dims[2]; int head_size = all_head_size / head_number; - auto *out = context.Output("Out"); + auto *out = context.Output("Out"); out->Resize({batch, seq_len, all_head_size}); auto *output_d = device_ctx.template Alloc(out, out->numel() * sizeof(T)); diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op.cc b/paddle/fluid/operators/fused/resnet_basic_block_op.cc index af5b76911692d..0f501368e73f0 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class ResNetBasicBlockOp : public framework::OperatorWithKernel { public: @@ -227,26 +227,26 @@ class ResNetBasicBlockOp : public framework::OperatorWithKernel { // By default, the type of the scale, bias, mean, // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Scale1")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Bias1")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Scale2")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Bias2")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale1")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias1")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale2")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias2")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; diff --git a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc index 429e644da4006..8310116849611 100644 --- a/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_basic_block_op_xpu.cc @@ -21,7 +21,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class ResnetBasicBlockAttr { public: @@ -49,11 +49,11 @@ class ResnetBasicBlockAttr { global_stats = test_mode || use_global_stats; // init shape - auto input1 = ctx.Input("X"); - auto filter1 = ctx.Input("Filter1"); - auto conv1_out = ctx.Output("Conv1"); - auto filter2 = ctx.Input("Filter2"); - auto conv2_out = ctx.Output("Conv2"); + auto input1 = ctx.Input("X"); + auto filter1 = ctx.Input("Filter1"); + auto conv1_out = ctx.Output("Conv1"); + auto filter2 = ctx.Input("Filter2"); + auto conv2_out = ctx.Output("Conv2"); conv1_input_shape = phi::vectorize(input1->dims()); conv1_output_shape = phi::vectorize(conv1_out->dims()); conv1_filter_shape = phi::vectorize(filter1->dims()); @@ -69,8 +69,8 @@ class ResnetBasicBlockAttr { conv2_output_numel = conv2_out->numel(); if (has_shortcut) { - auto filter3 = ctx.Input("Filter3"); - auto conv3_out = ctx.Output("Conv3"); + auto filter3 = ctx.Input("Filter3"); + auto conv3_out = ctx.Output("Conv3"); conv3_input_shape = phi::vectorize(input1->dims()); conv3_output_shape = phi::vectorize(conv3_out->dims()); conv3_filter_shape = phi::vectorize(filter3->dims()); @@ -137,11 +137,11 @@ class ResnetBasicBlockGradAttr { find_max = ctx.Attr("find_conv_input_max"); // init shape - auto input1 = ctx.Input("X"); - auto filter1 = ctx.Input("Filter1"); - auto conv1_out = ctx.Input("Conv1"); - auto filter2 = ctx.Input("Filter2"); - auto conv2_out = ctx.Input("Conv2"); + auto input1 = ctx.Input("X"); + auto filter1 = ctx.Input("Filter1"); + auto conv1_out = ctx.Input("Conv1"); + auto filter2 = ctx.Input("Filter2"); + auto conv2_out = ctx.Input("Conv2"); conv1_input_shape = phi::vectorize(input1->dims()); conv1_output_shape = phi::vectorize(conv1_out->dims()); conv1_filter_shape = phi::vectorize(filter1->dims()); @@ -157,8 +157,8 @@ class ResnetBasicBlockGradAttr { conv2_output_numel = conv2_out->numel(); if (has_shortcut) { - auto filter3 = ctx.Input("Filter3"); - auto conv3_out = ctx.Input("Conv3"); + auto filter3 = ctx.Input("Filter3"); + auto conv3_out = ctx.Input("Conv3"); conv3_input_shape = phi::vectorize(input1->dims()); conv3_output_shape = phi::vectorize(conv3_out->dims()); conv3_filter_shape = phi::vectorize(filter3->dims()); @@ -307,19 +307,19 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { platform::errors::PreconditionNotMet("It must use XPUPlace.")); // input - const Tensor* x = ctx.Input("X"); - const Tensor* filter1 = ctx.Input("Filter1"); - const Tensor* scale1 = ctx.Input("Scale1"); - const Tensor* bias1 = ctx.Input("Bias1"); - const Tensor* filter2 = ctx.Input("Filter2"); - const Tensor* scale2 = ctx.Input("Scale2"); - const Tensor* bias2 = ctx.Input("Bias2"); + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* filter1 = ctx.Input("Filter1"); + const phi::DenseTensor* scale1 = ctx.Input("Scale1"); + const phi::DenseTensor* bias1 = ctx.Input("Bias1"); + const phi::DenseTensor* filter2 = ctx.Input("Filter2"); + const phi::DenseTensor* scale2 = ctx.Input("Scale2"); + const phi::DenseTensor* bias2 = ctx.Input("Bias2"); // output - Tensor* conv1_output = ctx.Output("Conv1"); - Tensor* conv2_output = ctx.Output("Conv2"); - Tensor* conv2_input = ctx.Output("Conv2Input"); - Tensor* output = ctx.Output("Y"); + phi::DenseTensor* conv1_output = ctx.Output("Conv1"); + phi::DenseTensor* conv2_output = ctx.Output("Conv2"); + phi::DenseTensor* conv2_input = ctx.Output("Conv2Input"); + phi::DenseTensor* output = ctx.Output("Y"); auto place = ctx.GetPlace(); auto x_data = reinterpret_cast(x->data()); @@ -348,19 +348,23 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { // init find max if (attr.find_max) { - Tensor* max_input1 = ctx.Output("MaxInput1"); - Tensor* max_filter1 = ctx.Output("MaxFilter1"); + phi::DenseTensor* max_input1 = ctx.Output("MaxInput1"); + phi::DenseTensor* max_filter1 = + ctx.Output("MaxFilter1"); conv1_input_max_data = max_input1->mutable_data(place); conv1_filter_max_data = max_filter1->mutable_data(place); - Tensor* max_input2 = ctx.Output("MaxInput2"); - Tensor* max_filter2 = ctx.Output("MaxFilter2"); + phi::DenseTensor* max_input2 = ctx.Output("MaxInput2"); + phi::DenseTensor* max_filter2 = + ctx.Output("MaxFilter2"); conv2_input_max_data = max_input2->mutable_data(place); conv2_filter_max_data = max_filter2->mutable_data(place); if (attr.has_shortcut) { - Tensor* max_input3 = ctx.Output("MaxInput3"); - Tensor* max_filter3 = ctx.Output("MaxFilter3"); + phi::DenseTensor* max_input3 = + ctx.Output("MaxInput3"); + phi::DenseTensor* max_filter3 = + ctx.Output("MaxFilter3"); conv3_input_max_data = max_input3->mutable_data(place); conv3_filter_max_data = max_filter3->mutable_data(place); } @@ -373,8 +377,8 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { // 1. short const XPUT* z_out_data = nullptr; if (attr.has_shortcut) { - Tensor* conv3_out = ctx.Output("Conv3"); - const Tensor* filter3 = ctx.Input("Filter3"); + phi::DenseTensor* conv3_out = ctx.Output("Conv3"); + const phi::DenseTensor* filter3 = ctx.Input("Filter3"); auto conv3_filter_data = reinterpret_cast(filter3->data()); auto conv3_output_data = @@ -414,8 +418,8 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { attr.group); // bn3 - const Tensor* scale3 = ctx.Input("Scale3"); - const Tensor* bias3 = ctx.Input("Bias3"); + const phi::DenseTensor* scale3 = ctx.Input("Scale3"); + const phi::DenseTensor* bias3 = ctx.Input("Bias3"); auto bias3_data = bias3->data(); auto scale3_data = scale3->data(); @@ -423,10 +427,14 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_XDNN_NOT_NULL(bn3_output_data); if (!attr.global_stats) { - Tensor* saved_mean3 = ctx.Output("SavedMean3"); - Tensor* saved_invstd3 = ctx.Output("SavedInvstd3"); - Tensor* running_mean3 = ctx.Output("Mean3Out"); - Tensor* running_var3 = ctx.Output("Var3Out"); + phi::DenseTensor* saved_mean3 = + ctx.Output("SavedMean3"); + phi::DenseTensor* saved_invstd3 = + ctx.Output("SavedInvstd3"); + phi::DenseTensor* running_mean3 = + ctx.Output("Mean3Out"); + phi::DenseTensor* running_var3 = + ctx.Output("Var3Out"); auto saved_mean3_data = saved_mean3->mutable_data(place); auto saved_invstd3_data = saved_invstd3->mutable_data(place); @@ -455,8 +463,8 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { 0); PADDLE_ENFORCE_XDNN_SUCCESS(r, "batch_norm_fusion"); } else { - const auto* mean3 = ctx.Input("Mean3"); - const auto* var3 = ctx.Input("Var3"); + const auto* mean3 = ctx.Input("Mean3"); + const auto* var3 = ctx.Input("Var3"); const auto* mean3_data = mean3->data(); const auto* variance3_data = var3->data(); r = xpu::batch_norm_infer(dev_ctx.x_context(), @@ -513,10 +521,13 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { // 3. bn1 + relu if (!attr.global_stats) { - Tensor* saved_mean1 = ctx.Output("SavedMean1"); - Tensor* saved_invstd1 = ctx.Output("SavedInvstd1"); - Tensor* running_mean1 = ctx.Output("Mean1Out"); - Tensor* running_var1 = ctx.Output("Var1Out"); + phi::DenseTensor* saved_mean1 = + ctx.Output("SavedMean1"); + phi::DenseTensor* saved_invstd1 = + ctx.Output("SavedInvstd1"); + phi::DenseTensor* running_mean1 = + ctx.Output("Mean1Out"); + phi::DenseTensor* running_var1 = ctx.Output("Var1Out"); auto saved_mean1_data = saved_mean1->mutable_data(place); auto saved_invstd1_data = saved_invstd1->mutable_data(place); @@ -549,8 +560,8 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { auto bn1_output_data = RAII_GUARD.alloc(attr.conv1_output_numel); PADDLE_ENFORCE_XDNN_NOT_NULL(bn1_output_data); - const auto* mean1 = ctx.Input("Mean1"); - const auto* var1 = ctx.Input("Var1"); + const auto* mean1 = ctx.Input("Mean1"); + const auto* var1 = ctx.Input("Var1"); const auto* mean_data = mean1->data(); const auto* variance_data = var1->data(); r = xpu::batch_norm_infer(dev_ctx.x_context(), @@ -580,8 +591,9 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { XPUT* conv2_filter_l3_data = RAII_GUARD.alloc_l3(attr.conv2_filter_numel); if (attr.find_max) { - Tensor* max_input2 = ctx.Output("MaxInput2"); - Tensor* max_filter2 = ctx.Output("MaxFilter2"); + phi::DenseTensor* max_input2 = ctx.Output("MaxInput2"); + phi::DenseTensor* max_filter2 = + ctx.Output("MaxFilter2"); conv2_input_max_data = max_input2->mutable_data(place); conv2_filter_max_data = max_filter2->mutable_data(place); @@ -615,10 +627,13 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { // 5. bn2 if (!attr.global_stats) { - Tensor* saved_mean2 = ctx.Output("SavedMean2"); - Tensor* saved_var2 = ctx.Output("SavedInvstd2"); - Tensor* running_mean2 = ctx.Output("Mean2Out"); - Tensor* running_var2 = ctx.Output("Var2Out"); + phi::DenseTensor* saved_mean2 = + ctx.Output("SavedMean2"); + phi::DenseTensor* saved_var2 = + ctx.Output("SavedInvstd2"); + phi::DenseTensor* running_mean2 = + ctx.Output("Mean2Out"); + phi::DenseTensor* running_var2 = ctx.Output("Var2Out"); auto saved_mean2_data = saved_mean2->mutable_data(place); auto saved_var2_data = saved_var2->mutable_data(place); @@ -650,8 +665,8 @@ class ResNetBasicBlockXPUKernel : public framework::OpKernel { auto bn2_out_data = RAII_GUARD.alloc(attr.conv2_output_numel); PADDLE_ENFORCE_XDNN_NOT_NULL(bn2_out_data); - const auto* mean2 = ctx.Input("Mean2"); - const auto* var2 = ctx.Input("Var2"); + const auto* mean2 = ctx.Input("Mean2"); + const auto* var2 = ctx.Input("Var2"); const auto* mean_data = mean2->data(); const auto* variance_data = var2->data(); r = xpu::batch_norm_infer(dev_ctx.x_context(), @@ -694,48 +709,69 @@ class ResNetBasicBlockGradXPUKernel : public framework::OpKernel { true, platform::errors::PreconditionNotMet("It must use XPUPlace.")); - const Tensor* y_grad = ctx.Input(framework::GradVarName("Y")); - const Tensor* y = ctx.Input("Y"); - - const Tensor* x = ctx.Input("X"); - const Tensor* filter1 = ctx.Input("Filter1"); - const Tensor* scale1 = ctx.Input("Scale1"); - const Tensor* filter2 = ctx.Input("Filter2"); - const Tensor* scale2 = ctx.Input("Scale2"); - const Tensor* saved_mean1 = ctx.Input("SavedMean1"); - const Tensor* saved_invstd1 = ctx.Input("SavedInvstd1"); - const Tensor* saved_mean2 = ctx.Input("SavedMean2"); - const Tensor* saved_invstd2 = ctx.Input("SavedInvstd2"); - const Tensor* conv1_out = ctx.Input("Conv1"); - const Tensor* conv2_out = ctx.Input("Conv2"); - const Tensor* conv2_input = ctx.Input("Conv2Input"); - - const Tensor* filter3 = ctx.Input("Filter3"); - const Tensor* conv3_out = ctx.Input("Conv3"); - const Tensor* scale3 = ctx.Input("Scale3"); - const Tensor* saved_mean3 = ctx.Input("SavedMean3"); - const Tensor* saved_invstd3 = ctx.Input("SavedInvstd3"); - - const Tensor* conv1_input_max = ctx.Input("MaxInput1"); - const Tensor* conv1_filter_max = ctx.Input("MaxFilter1"); - const Tensor* conv2_input_max = ctx.Input("MaxInput2"); - const Tensor* conv2_filter_max = ctx.Input("MaxFilter2"); - const Tensor* conv3_input_max = ctx.Input("MaxInput3"); - const Tensor* conv3_filter_max = ctx.Input("MaxFilter3"); - - Tensor* x_grad = ctx.Output(framework::GradVarName("X")); - Tensor* filter1_grad = - ctx.Output(framework::GradVarName("Filter1")); - Tensor* scale1_grad = ctx.Output(framework::GradVarName("Scale1")); - Tensor* bias1_grad = ctx.Output(framework::GradVarName("Bias1")); - Tensor* filter2_grad = - ctx.Output(framework::GradVarName("Filter2")); - Tensor* scale2_grad = ctx.Output(framework::GradVarName("Scale2")); - Tensor* bias2_grad = ctx.Output(framework::GradVarName("Bias2")); - Tensor* filter3_grad = - ctx.Output(framework::GradVarName("Filter3")); - Tensor* scale3_grad = ctx.Output(framework::GradVarName("Scale3")); - Tensor* bias3_grad = ctx.Output(framework::GradVarName("Bias3")); + const phi::DenseTensor* y_grad = + ctx.Input(framework::GradVarName("Y")); + const phi::DenseTensor* y = ctx.Input("Y"); + + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* filter1 = ctx.Input("Filter1"); + const phi::DenseTensor* scale1 = ctx.Input("Scale1"); + const phi::DenseTensor* filter2 = ctx.Input("Filter2"); + const phi::DenseTensor* scale2 = ctx.Input("Scale2"); + const phi::DenseTensor* saved_mean1 = + ctx.Input("SavedMean1"); + const phi::DenseTensor* saved_invstd1 = + ctx.Input("SavedInvstd1"); + const phi::DenseTensor* saved_mean2 = + ctx.Input("SavedMean2"); + const phi::DenseTensor* saved_invstd2 = + ctx.Input("SavedInvstd2"); + const phi::DenseTensor* conv1_out = ctx.Input("Conv1"); + const phi::DenseTensor* conv2_out = ctx.Input("Conv2"); + const phi::DenseTensor* conv2_input = + ctx.Input("Conv2Input"); + + const phi::DenseTensor* filter3 = ctx.Input("Filter3"); + const phi::DenseTensor* conv3_out = ctx.Input("Conv3"); + const phi::DenseTensor* scale3 = ctx.Input("Scale3"); + const phi::DenseTensor* saved_mean3 = + ctx.Input("SavedMean3"); + const phi::DenseTensor* saved_invstd3 = + ctx.Input("SavedInvstd3"); + + const phi::DenseTensor* conv1_input_max = + ctx.Input("MaxInput1"); + const phi::DenseTensor* conv1_filter_max = + ctx.Input("MaxFilter1"); + const phi::DenseTensor* conv2_input_max = + ctx.Input("MaxInput2"); + const phi::DenseTensor* conv2_filter_max = + ctx.Input("MaxFilter2"); + const phi::DenseTensor* conv3_input_max = + ctx.Input("MaxInput3"); + const phi::DenseTensor* conv3_filter_max = + ctx.Input("MaxFilter3"); + + phi::DenseTensor* x_grad = + ctx.Output(framework::GradVarName("X")); + phi::DenseTensor* filter1_grad = + ctx.Output(framework::GradVarName("Filter1")); + phi::DenseTensor* scale1_grad = + ctx.Output(framework::GradVarName("Scale1")); + phi::DenseTensor* bias1_grad = + ctx.Output(framework::GradVarName("Bias1")); + phi::DenseTensor* filter2_grad = + ctx.Output(framework::GradVarName("Filter2")); + phi::DenseTensor* scale2_grad = + ctx.Output(framework::GradVarName("Scale2")); + phi::DenseTensor* bias2_grad = + ctx.Output(framework::GradVarName("Bias2")); + phi::DenseTensor* filter3_grad = + ctx.Output(framework::GradVarName("Filter3")); + phi::DenseTensor* scale3_grad = + ctx.Output(framework::GradVarName("Scale3")); + phi::DenseTensor* bias3_grad = + ctx.Output(framework::GradVarName("Bias3")); // attrs ResnetBasicBlockGradAttr attr(ctx); diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 779e28c85b72a..61c8d9813ea29 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; // Shape of bitmask static framework::DDim GetBitmaskDims(std::vector out_shape) { @@ -209,16 +209,16 @@ class ResNetUnitOp : public framework::OperatorWithKernel { // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("ScaleX")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("BiasX")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("ScaleX")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("BiasX")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; return framework::OpKernelType( diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cu b/paddle/fluid/operators/fused/resnet_unit_op.cu index d0a8788e0db2f..02bde0ef04ff2 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cu +++ b/paddle/fluid/operators/fused/resnet_unit_op.cu @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ResNetUnitKernel : public framework::OpKernel { @@ -39,20 +39,20 @@ class ResNetUnitKernel : public framework::OpKernel { "ResNetUnitOp only supports float16 for now.")); // input x - const Tensor *input_x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *bias_x = ctx.Input("BiasX"); + const Tensor *input_x = ctx.Input("X"); + const Tensor *filter_x = ctx.Input("FilterX"); + const Tensor *scale_x = ctx.Input("ScaleX"); + const Tensor *bias_x = ctx.Input("BiasX"); // norm conv - Tensor *conv_out_x = ctx.Output("ConvX"); + Tensor *conv_out_x = ctx.Output("ConvX"); // bn finalize - Tensor *saved_mean_x = ctx.Output("SavedMeanX"); - Tensor *saved_invstd_x = ctx.Output("SavedInvstdX"); - Tensor *running_mean_x = ctx.Output("RunningMeanX"); - Tensor *running_var_x = ctx.Output("RunningVarX"); + Tensor *saved_mean_x = ctx.Output("SavedMeanX"); + Tensor *saved_invstd_x = ctx.Output("SavedInvstdX"); + Tensor *running_mean_x = ctx.Output("RunningMeanX"); + Tensor *running_var_x = ctx.Output("RunningVarX"); // sbar - Tensor *output = ctx.Output("Y"); - Tensor *bitmask = ctx.Output("BitMask"); + Tensor *output = ctx.Output("Y"); + Tensor *bitmask = ctx.Output("BitMask"); // attrs int padding = ctx.Attr("padding"); int stride = ctx.Attr("stride"); @@ -140,17 +140,17 @@ class ResNetUnitKernel : public framework::OpKernel { bitmask_shape); if (has_shortcut) { // input z - const Tensor *input_z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *bias_z = ctx.Input("BiasZ"); + const Tensor *input_z = ctx.Input("Z"); + const Tensor *filter_z = ctx.Input("FilterZ"); + const Tensor *scale_z = ctx.Input("ScaleZ"); + const Tensor *bias_z = ctx.Input("BiasZ"); // norm conv - Tensor *conv_out_z = ctx.Output("ConvZ"); + Tensor *conv_out_z = ctx.Output("ConvZ"); // bn finalize - Tensor *saved_mean_z = ctx.Output("SavedMeanZ"); - Tensor *saved_invstd_z = ctx.Output("SavedInvstdZ"); - Tensor *running_mean_z = ctx.Output("RunningMeanZ"); - Tensor *running_var_z = ctx.Output("RunningVarZ"); + Tensor *saved_mean_z = ctx.Output("SavedMeanZ"); + Tensor *saved_invstd_z = ctx.Output("SavedInvstdZ"); + Tensor *running_mean_z = ctx.Output("RunningMeanZ"); + Tensor *running_var_z = ctx.Output("RunningVarZ"); auto input_z_shape = phi::vectorize(input_z->dims()); auto filter_z_shape = phi::vectorize(filter_z->dims()); @@ -203,7 +203,8 @@ class ResNetUnitKernel : public framework::OpKernel { output, bitmask); } else { - const Tensor *input_z = fuse_add ? ctx.Input("Z") : nullptr; + const Tensor *input_z = + fuse_add ? ctx.Input("Z") : nullptr; sbar_op.Forward(dev_ctx, *conv_out_x, equiv_scale_x, @@ -230,24 +231,27 @@ class ResNetUnitGradKernel : public framework::OpKernel { platform::errors::Unavailable( "ResNetUnitOp only supports float16 for now.")); - const Tensor *y_grad = ctx.Input(framework::GradVarName("Y")); + const Tensor *y_grad = + ctx.Input(framework::GradVarName("Y")); - const Tensor *x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *bias_x = ctx.Input("BiasX"); - const Tensor *saved_mean_x = ctx.Input("SavedMeanX"); - const Tensor *saved_invstd_x = ctx.Input("SavedInvstdX"); + const Tensor *x = ctx.Input("X"); + const Tensor *filter_x = ctx.Input("FilterX"); + const Tensor *scale_x = ctx.Input("ScaleX"); + const Tensor *bias_x = ctx.Input("BiasX"); + const Tensor *saved_mean_x = ctx.Input("SavedMeanX"); + const Tensor *saved_invstd_x = ctx.Input("SavedInvstdX"); - const Tensor *conv_out_x = ctx.Input("ConvX"); - const Tensor *output = ctx.Input("Y"); - const Tensor *bitmask = ctx.Input("BitMask"); + const Tensor *conv_out_x = ctx.Input("ConvX"); + const Tensor *output = ctx.Input("Y"); + const Tensor *bitmask = ctx.Input("BitMask"); - Tensor *x_grad = ctx.Output(framework::GradVarName("X")); + Tensor *x_grad = ctx.Output(framework::GradVarName("X")); Tensor *filter_x_grad = - ctx.Output(framework::GradVarName("FilterX")); - Tensor *scale_x_grad = ctx.Output(framework::GradVarName("ScaleX")); - Tensor *bias_x_grad = ctx.Output(framework::GradVarName("BiasX")); + ctx.Output(framework::GradVarName("FilterX")); + Tensor *scale_x_grad = + ctx.Output(framework::GradVarName("ScaleX")); + Tensor *bias_x_grad = + ctx.Output(framework::GradVarName("BiasX")); int padding = ctx.Attr("padding"); int stride = ctx.Attr("stride"); @@ -291,20 +295,23 @@ class ResNetUnitGradKernel : public framework::OpKernel { // ScaleBiasAddRelu // | // Y - const Tensor *z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *bias_z = ctx.Input("BiasZ"); - const Tensor *saved_mean_z = ctx.Input("SavedMeanZ"); - const Tensor *saved_invstd_z = ctx.Input("SavedInvstdZ"); - const Tensor *conv_out_z = ctx.Input("ConvZ"); - - Tensor *z_grad = ctx.Output(framework::GradVarName("Z")); + const Tensor *z = ctx.Input("Z"); + const Tensor *filter_z = ctx.Input("FilterZ"); + const Tensor *scale_z = ctx.Input("ScaleZ"); + const Tensor *bias_z = ctx.Input("BiasZ"); + const Tensor *saved_mean_z = ctx.Input("SavedMeanZ"); + const Tensor *saved_invstd_z = + ctx.Input("SavedInvstdZ"); + const Tensor *conv_out_z = ctx.Input("ConvZ"); + + Tensor *z_grad = + ctx.Output(framework::GradVarName("Z")); Tensor *filter_z_grad = - ctx.Output(framework::GradVarName("FilterZ")); + ctx.Output(framework::GradVarName("FilterZ")); Tensor *scale_z_grad = - ctx.Output(framework::GradVarName("ScaleZ")); - Tensor *bias_z_grad = ctx.Output(framework::GradVarName("BiasZ")); + ctx.Output(framework::GradVarName("ScaleZ")); + Tensor *bias_z_grad = + ctx.Output(framework::GradVarName("BiasZ")); // 1.1 Backward of BN + Add (+ Relu) for x, get conv_out_x_grad, // scale_x_grad, bias_x_grad and z_grad_temp @@ -360,7 +367,8 @@ class ResNetUnitGradKernel : public framework::OpKernel { // 1.1 Backward of BN (+ Add + Relu) for x, get conv_out_x_grad, // scale_x_grad, bias_x_grad (and z_grad) Tensor *z_grad = - fuse_add ? ctx.Output(framework::GradVarName("Z")) : nullptr; + fuse_add ? ctx.Output(framework::GradVarName("Z")) + : nullptr; sbar_x_op.Backward(dev_ctx, *y_grad, *conv_out_x, diff --git a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc index e9ad179960628..80986761c7cba 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op_xpu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ResNetUnitXPUKernel : public framework::OpKernel { @@ -35,19 +35,19 @@ class ResNetUnitXPUKernel : public framework::OpKernel { bool is_nchw = (ctx.Attr("data_format") == "NCHW"); // input x - const Tensor *input_x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *bias_x = ctx.Input("BiasX"); + const Tensor *input_x = ctx.Input("X"); + const Tensor *filter_x = ctx.Input("FilterX"); + const Tensor *scale_x = ctx.Input("ScaleX"); + const Tensor *bias_x = ctx.Input("BiasX"); // output x - Tensor *conv_out_x = ctx.Output("ConvX"); - Tensor *saved_mean_x = ctx.Output("SavedMeanX"); - Tensor *saved_invstd_x = ctx.Output("SavedInvstdX"); - Tensor *running_mean_x = ctx.Output("RunningMeanX"); - Tensor *running_var_x = ctx.Output("RunningVarX"); + Tensor *conv_out_x = ctx.Output("ConvX"); + Tensor *saved_mean_x = ctx.Output("SavedMeanX"); + Tensor *saved_invstd_x = ctx.Output("SavedInvstdX"); + Tensor *running_mean_x = ctx.Output("RunningMeanX"); + Tensor *running_var_x = ctx.Output("RunningVarX"); - Tensor *output = ctx.Output("Y"); + Tensor *output = ctx.Output("Y"); // attrs int padding = ctx.Attr("padding"); @@ -101,16 +101,16 @@ class ResNetUnitXPUKernel : public framework::OpKernel { std::vector w_maxlist = {nullptr}; if (has_shortcut) { // input z - const Tensor *input_z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *bias_z = ctx.Input("BiasZ"); + const Tensor *input_z = ctx.Input("Z"); + const Tensor *filter_z = ctx.Input("FilterZ"); + const Tensor *scale_z = ctx.Input("ScaleZ"); + const Tensor *bias_z = ctx.Input("BiasZ"); - Tensor *conv_out_z = ctx.Output("ConvZ"); - Tensor *saved_mean_z = ctx.Output("SavedMeanZ"); - Tensor *saved_invstd_z = ctx.Output("SavedInvstdZ"); - Tensor *running_mean_z = ctx.Output("RunningMeanZ"); - Tensor *running_var_z = ctx.Output("RunningVarZ"); + Tensor *conv_out_z = ctx.Output("ConvZ"); + Tensor *saved_mean_z = ctx.Output("SavedMeanZ"); + Tensor *saved_invstd_z = ctx.Output("SavedInvstdZ"); + Tensor *running_mean_z = ctx.Output("RunningMeanZ"); + Tensor *running_var_z = ctx.Output("RunningVarZ"); x_list.push_back(reinterpret_cast(input_z->data())); w_list.push_back(reinterpret_cast(filter_z->data())); @@ -137,7 +137,7 @@ class ResNetUnitXPUKernel : public framework::OpKernel { w_maxlist.push_back(nullptr); } else { if (fuse_add) { - const Tensor *input_z = ctx.Input("Z"); + const Tensor *input_z = ctx.Input("Z"); auto input_z_shape = phi::vectorize(input_z->dims()); x_list.push_back(reinterpret_cast(input_z->data())); x_shape_list.push_back(input_z_shape); @@ -189,20 +189,23 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { platform::errors::PreconditionNotMet("It must use XPUPlace.")); bool is_nchw = (ctx.Attr("data_format") == "NCHW"); - const Tensor *y_grad = ctx.Input(framework::GradVarName("Y")); - const Tensor *x = ctx.Input("X"); - const Tensor *filter_x = ctx.Input("FilterX"); - const Tensor *scale_x = ctx.Input("ScaleX"); - const Tensor *saved_mean_x = ctx.Input("SavedMeanX"); - const Tensor *saved_invstd_x = ctx.Input("SavedInvstdX"); - const Tensor *conv_out_x = ctx.Input("ConvX"); - const Tensor *output = ctx.Input("Y"); - - Tensor *x_grad = ctx.Output(framework::GradVarName("X")); + const Tensor *y_grad = + ctx.Input(framework::GradVarName("Y")); + const Tensor *x = ctx.Input("X"); + const Tensor *filter_x = ctx.Input("FilterX"); + const Tensor *scale_x = ctx.Input("ScaleX"); + const Tensor *saved_mean_x = ctx.Input("SavedMeanX"); + const Tensor *saved_invstd_x = ctx.Input("SavedInvstdX"); + const Tensor *conv_out_x = ctx.Input("ConvX"); + const Tensor *output = ctx.Input("Y"); + + Tensor *x_grad = ctx.Output(framework::GradVarName("X")); Tensor *filter_x_grad = - ctx.Output(framework::GradVarName("FilterX")); - Tensor *scale_x_grad = ctx.Output(framework::GradVarName("ScaleX")); - Tensor *bias_x_grad = ctx.Output(framework::GradVarName("BiasX")); + ctx.Output(framework::GradVarName("FilterX")); + Tensor *scale_x_grad = + ctx.Output(framework::GradVarName("ScaleX")); + Tensor *bias_x_grad = + ctx.Output(framework::GradVarName("BiasX")); int padding = ctx.Attr("padding"); int stride = ctx.Attr("stride"); @@ -262,19 +265,22 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { // ScaleBiasAddRelu // | // Y - const Tensor *z = ctx.Input("Z"); - const Tensor *filter_z = ctx.Input("FilterZ"); - const Tensor *scale_z = ctx.Input("ScaleZ"); - const Tensor *saved_mean_z = ctx.Input("SavedMeanZ"); - const Tensor *saved_invstd_z = ctx.Input("SavedInvstdZ"); - const Tensor *conv_out_z = ctx.Input("ConvZ"); - - Tensor *z_grad = ctx.Output(framework::GradVarName("Z")); + const Tensor *z = ctx.Input("Z"); + const Tensor *filter_z = ctx.Input("FilterZ"); + const Tensor *scale_z = ctx.Input("ScaleZ"); + const Tensor *saved_mean_z = ctx.Input("SavedMeanZ"); + const Tensor *saved_invstd_z = + ctx.Input("SavedInvstdZ"); + const Tensor *conv_out_z = ctx.Input("ConvZ"); + + Tensor *z_grad = + ctx.Output(framework::GradVarName("Z")); Tensor *filter_z_grad = - ctx.Output(framework::GradVarName("FilterZ")); + ctx.Output(framework::GradVarName("FilterZ")); Tensor *scale_z_grad = - ctx.Output(framework::GradVarName("ScaleZ")); - Tensor *bias_z_grad = ctx.Output(framework::GradVarName("BiasZ")); + ctx.Output(framework::GradVarName("ScaleZ")); + Tensor *bias_z_grad = + ctx.Output(framework::GradVarName("BiasZ")); x_list.push_back(reinterpret_cast(z->data())); w_list.push_back(reinterpret_cast(filter_z->data())); conv_y_list.push_back( @@ -303,7 +309,7 @@ class ResNetUnitGradXPUKernel : public framework::OpKernel { dbias_list.push_back(bias_z_grad->mutable_data(place)); } else { if (fuse_add) { - auto z_grad = ctx.Output(framework::GradVarName("Z")); + auto z_grad = ctx.Output(framework::GradVarName("Z")); dx_list.push_back( reinterpret_cast(z_grad->mutable_data(place))); } diff --git a/paddle/fluid/operators/fused/skip_layernorm_op.cu b/paddle/fluid/operators/fused/skip_layernorm_op.cu index 307d61b31ad38..96646071567d5 100644 --- a/paddle/fluid/operators/fused/skip_layernorm_op.cu +++ b/paddle/fluid/operators/fused/skip_layernorm_op.cu @@ -29,11 +29,11 @@ template class SkipLayerNormKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - using Tensor = framework::Tensor; - auto *X = context.Input("X"); - auto *Y = context.Input("Y"); - auto *scale = context.Input("Scale"); - auto *bias = context.Input("Bias"); + using Tensor = phi::DenseTensor; + auto *X = context.Input("X"); + auto *Y = context.Input("Y"); + auto *scale = context.Input("Scale"); + auto *bias = context.Input("Bias"); auto *X_d = X->data(); auto *Y_d = Y->data(); @@ -42,7 +42,7 @@ class SkipLayerNormKernel : public framework::OpKernel { float epsilon = context.Attr("epsilon"); int begin_norm_axis = context.Attr("begin_norm_axis"); - auto *out = context.Output("Out"); + auto *out = context.Output("Out"); out->Resize(X->dims()); auto &dev_ctx = context.template device_context(); auto *output_d = dev_ctx.Alloc(out, out->numel() * sizeof(T)); diff --git a/paddle/fluid/operators/fused/yolo_box_head_op.cu b/paddle/fluid/operators/fused/yolo_box_head_op.cu index f932b13d993fa..696cab20db714 100644 --- a/paddle/fluid/operators/fused/yolo_box_head_op.cu +++ b/paddle/fluid/operators/fused/yolo_box_head_op.cu @@ -67,9 +67,9 @@ template class YoloBoxHeadKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - using Tensor = framework::Tensor; - auto* x = context.Input("X"); - auto* out = context.Output("Out"); + using Tensor = phi::DenseTensor; + auto* x = context.Input("X"); + auto* out = context.Output("Out"); auto anchors = context.Attr>("anchors"); auto class_num = context.Attr("class_num"); auto& device_ctx = context.template device_context(); diff --git a/paddle/fluid/operators/fused/yolo_box_post_op.cu b/paddle/fluid/operators/fused/yolo_box_post_op.cu index 4d53cccf97685..072f0374c5b82 100644 --- a/paddle/fluid/operators/fused/yolo_box_post_op.cu +++ b/paddle/fluid/operators/fused/yolo_box_post_op.cu @@ -319,13 +319,13 @@ template class YoloBoxPostKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; // prepare inputs std::vector boxes_input(3); std::vector> boxes_input_dims(3); for (int i = 0; i < 3; i++) { auto* boxes_tensor = - context.Input("Boxes" + std::to_string(i)); + context.Input("Boxes" + std::to_string(i)); boxes_input[i] = boxes_tensor->data(); auto dims = boxes_tensor->dims(); for (int j = 0; j < dims.size(); j++) { @@ -333,13 +333,13 @@ class YoloBoxPostKernel : public framework::OpKernel { } } const float* image_shape_data = - context.Input("ImageShape")->data(); + context.Input("ImageShape")->data(); const float* image_scale_data = - context.Input("ImageScale")->data(); + context.Input("ImageScale")->data(); // prepare outputs - auto* boxes_scores_tensor = context.Output("Out"); - auto* boxes_num_tensor = context.Output("NmsRoisNum"); + auto* boxes_scores_tensor = context.Output("Out"); + auto* boxes_num_tensor = context.Output("NmsRoisNum"); // prepare anchors std::vector anchors; @@ -382,7 +382,7 @@ class YoloBoxPostKernel : public framework::OpKernel { // clip_bbox and scale_x_y is not used now! float nms_threshold = context.Attr("nms_threshold"); - int batch = context.Input("ImageShape")->dims()[0]; + int batch = context.Input("ImageShape")->dims()[0]; TensorInfo* ts_info = new TensorInfo[batch * boxes_input.size()]; for (int i = 0; i < batch * static_cast(boxes_input.size()); i++) { #ifdef PADDLE_WITH_HIP diff --git a/paddle/fluid/operators/fused_softmax_mask_op.cc b/paddle/fluid/operators/fused_softmax_mask_op.cc index 604eaaaf3fc7c..3c8db22f52617 100644 --- a/paddle/fluid/operators/fused_softmax_mask_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class SoftmaxMaskFuseOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc index 5992fa2dfc6e4..5d1e4089a753d 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class SoftmaxMaskFuseUpperTriangleOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu index 54db576d3171b..4a59250847444 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu @@ -51,7 +51,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; #ifdef PADDLE_WITH_HIP #define WARP_SIZE 64 @@ -348,8 +347,8 @@ template class SoftmaxMaskFuseUpperTriangleKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Output("Out"); + auto* x = context.Input("X"); + auto* y = context.Output("Out"); auto* x_data = x->data(); auto* y_data = y->mutable_data(context.GetPlace()); @@ -458,9 +457,11 @@ template class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* grad_x = context.Output(framework::GradVarName("X")); - auto* grad_y = context.Input(framework::GradVarName("Out")); - auto* softmax_rst = context.Input("Softmax"); + auto* grad_x = + context.Output(framework::GradVarName("X")); + auto* grad_y = + context.Input(framework::GradVarName("Out")); + auto* softmax_rst = context.Input("Softmax"); auto* grad_x_data = grad_x->mutable_data(context.GetPlace()); auto* grad_y_data = grad_y->data(); diff --git a/paddle/fluid/operators/fused_token_prune_op.cc b/paddle/fluid/operators/fused_token_prune_op.cc index da43ab7588647..2fb5435bdcbe6 100644 --- a/paddle/fluid/operators/fused_token_prune_op.cc +++ b/paddle/fluid/operators/fused_token_prune_op.cc @@ -15,8 +15,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class FusedTokenPruneOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { diff --git a/paddle/fluid/operators/fused_token_prune_op.cu b/paddle/fluid/operators/fused_token_prune_op.cu index 90044f30d8a6e..acf589ef186eb 100644 --- a/paddle/fluid/operators/fused_token_prune_op.cu +++ b/paddle/fluid/operators/fused_token_prune_op.cu @@ -28,8 +28,6 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using framework::Tensor; - template struct AttnMaskFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { @@ -87,10 +85,11 @@ class FusedTokenPruneOpCUDAKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto& dev_ctx = context.cuda_device_context(); // Inouts - const Tensor* attn = context.Input("Attn"); - const Tensor* x = context.Input("X"); - const Tensor* mask = context.Input("Mask"); - const Tensor* new_mask = context.Input("NewMask"); + const phi::DenseTensor* attn = context.Input("Attn"); + const phi::DenseTensor* x = context.Input("X"); + const phi::DenseTensor* mask = context.Input("Mask"); + const phi::DenseTensor* new_mask = + context.Input("NewMask"); // Input dims auto attn_dims = attn->dims(); @@ -108,35 +107,37 @@ class FusedTokenPruneOpCUDAKernel : public framework::OpKernel { const bool keep_order = context.Attr("keep_order"); // Outputs - Tensor* out_slimmed_x = context.Output("SlimmedX"); - Tensor* slimmed_indices = context.Output("CLSInds"); + phi::DenseTensor* out_slimmed_x = + context.Output("SlimmedX"); + phi::DenseTensor* slimmed_indices = + context.Output("CLSInds"); auto* out_slimmed_x_data = out_slimmed_x->mutable_data(context.GetPlace()); auto* slimmed_indices_data = slimmed_indices->mutable_data(context.GetPlace()); // Intermediate variable - Tensor attn_tmp; + phi::DenseTensor attn_tmp; auto* attn_tmp_data = attn_tmp.mutable_data(attn_dims, context.GetPlace()); - Tensor attn_accu; + phi::DenseTensor attn_accu; auto* attn_accu_data = attn_accu.mutable_data({bsz, max_seq_len}, context.GetPlace()); - Tensor attn_accu_indices; + phi::DenseTensor attn_accu_indices; auto* attn_accu_indices_data = attn_accu_indices.mutable_data( {bsz, max_seq_len}, context.GetPlace()); - Tensor sort_attn_accu; + phi::DenseTensor sort_attn_accu; auto* sort_attn_accu_data = sort_attn_accu.mutable_data({bsz, max_seq_len}, context.GetPlace()); - Tensor sort_attn_accu_indices; + phi::DenseTensor sort_attn_accu_indices; auto* sort_attn_accu_indices_data = sort_attn_accu_indices.mutable_data({bsz, max_seq_len}, context.GetPlace()); - Tensor temp_storage; + phi::DenseTensor temp_storage; // 1. Filter attn by mask - std::vector ins; - std::vector outs; + std::vector ins; + std::vector outs; ins.emplace_back(attn); ins.emplace_back(mask); outs.emplace_back(&attn_tmp); diff --git a/paddle/fluid/operators/gather_nd_op.cc b/paddle/fluid/operators/gather_nd_op.cc index 59648bc7d17eb..3198e35b8a438 100644 --- a/paddle/fluid/operators/gather_nd_op.cc +++ b/paddle/fluid/operators/gather_nd_op.cc @@ -27,7 +27,7 @@ class GatherNdOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); const auto& x_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); return framework::OpKernelType( x_type, diff --git a/paddle/fluid/operators/gather_nd_op_mlu.cc b/paddle/fluid/operators/gather_nd_op_mlu.cc index aa869f8fa1534..b6c96e3c2edd5 100644 --- a/paddle/fluid/operators/gather_nd_op_mlu.cc +++ b/paddle/fluid/operators/gather_nd_op_mlu.cc @@ -20,15 +20,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class GatherNdMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *index = ctx.Input("Index"); - auto *out = ctx.Output("Out"); + auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); + auto *out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->template mutable_data(place); @@ -71,10 +71,10 @@ template class GatherNdGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *index = ctx.Input("Index"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); + auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); + auto *x = ctx.Input("X"); if (dx->numel() == 0) return; if (index->numel() == 0) { @@ -83,8 +83,8 @@ class GatherNdGradMLUKernel : public framework::OpKernel { return; } - framework::Tensor tmp_tensor(index->type()); - framework::Tensor tmp_tensor2(dout->type()); + phi::DenseTensor tmp_tensor(index->type()); + phi::DenseTensor tmp_tensor2(dout->type()); const auto index_dims = index->dims(); if (index_dims.size() == 1) { tmp_tensor.ShareDataWith(*index); diff --git a/paddle/fluid/operators/gather_nd_op_npu.cc b/paddle/fluid/operators/gather_nd_op_npu.cc index 3e91360fd054a..5cea840b4aec5 100644 --- a/paddle/fluid/operators/gather_nd_op_npu.cc +++ b/paddle/fluid/operators/gather_nd_op_npu.cc @@ -21,16 +21,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template class GatherNdNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *index = ctx.Input("Index"); - auto *out = ctx.Output("Out"); + auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); + auto *out = ctx.Output("Out"); out->template mutable_data(ctx.GetPlace()); @@ -65,10 +65,10 @@ template class GatherNdGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *index = ctx.Input("Index"); - auto *x = ctx.Input("X"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); + auto *index = ctx.Input("Index"); + auto *x = ctx.Input("X"); + auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); auto *p = dx->mutable_data(ctx.GetPlace()); if (dx->numel() == 0) return; @@ -78,8 +78,8 @@ class GatherNdGradNPUKernel : public framework::OpKernel { return; } - framework::Tensor tmp_tensor(index->type()); - framework::Tensor tmp_tensor2(dout->type()); + phi::DenseTensor tmp_tensor(index->type()); + phi::DenseTensor tmp_tensor2(dout->type()); const auto index_dims = index->dims(); if (index_dims.size() == 1) { tmp_tensor.ShareDataWith(*index); diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index 77e4adfeea787..4907153a11874 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -40,7 +40,7 @@ class GatherOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "Axis") { return expected_kernel_type; @@ -63,7 +63,7 @@ class GatherGradOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "Axis") { return expected_kernel_type; diff --git a/paddle/fluid/operators/gather_op_mlu.cc b/paddle/fluid/operators/gather_op_mlu.cc index 5162e5838d013..20a108c981d7e 100644 --- a/paddle/fluid/operators/gather_op_mlu.cc +++ b/paddle/fluid/operators/gather_op_mlu.cc @@ -23,8 +23,8 @@ template class GatherOpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *index = ctx.Input("Index"); + auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); auto axis = ctx.Attr("axis"); const auto index_dims = index->dims(); @@ -44,7 +44,7 @@ class GatherOpMLUKernel : public framework::OpKernel { index_dims.size())); } - auto *out = ctx.Output("Out"); + auto *out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc x_desc(*x); @@ -68,9 +68,9 @@ template class GatherGradOpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *index = ctx.Input("Index"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); + auto *index = ctx.Input("Index"); + auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); const auto index_dims = index->dims(); if (index_dims.size() == 2) { diff --git a/paddle/fluid/operators/gather_op_npu.cc b/paddle/fluid/operators/gather_op_npu.cc index 8f470b0f664e5..b6c1e3ddc6d21 100644 --- a/paddle/fluid/operators/gather_op_npu.cc +++ b/paddle/fluid/operators/gather_op_npu.cc @@ -28,9 +28,9 @@ template class GatherOpNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *index = ctx.Input("Index"); - auto *out = ctx.Output("Out"); + auto *x = ctx.Input("X"); + auto *index = ctx.Input("Index"); + auto *out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); const auto &runner = NpuOpRunner( @@ -46,14 +46,14 @@ template class GatherGradOpNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *index = ctx.Input("Index"); - auto *x = ctx.Input("X"); - auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); + auto *index = ctx.Input("Index"); + auto *x = ctx.Input("X"); + auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); // step1: Unsqueeze index - framework::Tensor tmp_tensor(index->type()); + phi::DenseTensor tmp_tensor(index->type()); const auto index_dims = index->dims(); if (index_dims.size() == 1) { tmp_tensor.ShareDataWith(*index); diff --git a/paddle/fluid/operators/gather_scatter_kernel.cc b/paddle/fluid/operators/gather_scatter_kernel.cc index 716e103990e6f..e05a214dcb4c1 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cc +++ b/paddle/fluid/operators/gather_scatter_kernel.cc @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class TensorAssign { public: @@ -52,8 +52,8 @@ struct cpu_gather_scatter_functor { template void operator()(Tensor self, int dim, - const Tensor& index, - const Tensor& src, + const phi::DenseTensor& index, + const phi::DenseTensor& src, const std::string& method_name, const func_t& reduce_op, const platform::DeviceContext& ctx) { @@ -120,8 +120,8 @@ struct cpu_gather_scatter_functor { self_idx = is_scatter_like ? replace_index : index_idx; src_idx = is_scatter_like ? index_idx : replace_index; - reduce_op((tensor_t*)(self_data + self_idx), - (tensor_t*)(src_data + src_idx)); + reduce_op(static_cast(self_data + self_idx), + static_cast(src_data + src_idx)); index_idx++; } } @@ -132,7 +132,7 @@ struct cpu_gather_scatter_functor { template void cpu_gather_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor result, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor void cpu_scatter_assign_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor void cpu_scatter_add_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor void cpu_scatter_mul_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx) { cpu_gather_scatter_functor void cpu_scatter_input_grad_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor output, const platform::DeviceContext& ctx) { auto* index_data = index.data(); diff --git a/paddle/fluid/operators/gather_scatter_kernel.cu b/paddle/fluid/operators/gather_scatter_kernel.cu index fa28481f4c4b6..80dbce4b24d28 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cu +++ b/paddle/fluid/operators/gather_scatter_kernel.cu @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class TensorAssign { public: @@ -98,7 +98,8 @@ __global__ void GatherScatterGPUKernel(tensor_t* self_data, i * outer_dim_size * replaced_select_dim_size; int64_t self_idx = is_scatter_like ? replace_index : tid; int64_t src_idx = is_scatter_like ? tid : replace_index; - reduce_op((tensor_t*)(self_data + self_idx), (tensor_t*)(src_data + src_idx)); + reduce_op(static_cast(self_data + self_idx), + static_cast(src_data + src_idx)); } template void operator()(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const std::string& method_name, const func_t& reduce_op, @@ -161,7 +162,7 @@ struct gpu_gather_scatter_functor { template void gpu_gather_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor result, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor void gpu_scatter_assign_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor void gpu_scatter_add_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor void gpu_scatter_mul_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx) { gpu_gather_scatter_functor void gpu_scatter_input_grad_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor grad, const platform::DeviceContext& ctx) { auto* index_data = index.data(); diff --git a/paddle/fluid/operators/gather_scatter_kernel.h b/paddle/fluid/operators/gather_scatter_kernel.h index 6aa6e4ff7b858..b97451b488b92 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.h +++ b/paddle/fluid/operators/gather_scatter_kernel.h @@ -32,84 +32,84 @@ namespace operators { #define Instantiate_Template_Function_index_t(func, tensor_t) \ template void func(Tensor input, \ int dim, \ - const Tensor& index, \ + const phi::DenseTensor& index, \ Tensor result, \ const platform::DeviceContext& ctx); \ template void func(Tensor input, \ int dim, \ - const Tensor& index, \ + const phi::DenseTensor& index, \ Tensor result, \ const platform::DeviceContext& ctx); -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void cpu_gather_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor result, const platform::DeviceContext& ctx); template void cpu_scatter_assign_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx); template void cpu_scatter_add_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx); template void cpu_scatter_mul_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx); template void cpu_scatter_input_grad_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor result, const platform::DeviceContext& ctx); template void gpu_gather_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor result, const platform::DeviceContext& ctx); template void gpu_scatter_assign_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx); template void gpu_scatter_add_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx); template void gpu_scatter_mul_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor src, const platform::DeviceContext& ctx); template void gpu_scatter_input_grad_kernel(Tensor self, int dim, - const Tensor& index, + const phi::DenseTensor& index, Tensor result, const platform::DeviceContext& ctx); } // namespace operators diff --git a/paddle/fluid/operators/gather_test.cc b/paddle/fluid/operators/gather_test.cc index 11c46d1772957..ff48ab776a856 100644 --- a/paddle/fluid/operators/gather_test.cc +++ b/paddle/fluid/operators/gather_test.cc @@ -20,9 +20,9 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" TEST(Gather, GatherData) { - paddle::framework::Tensor* src = new paddle::framework::Tensor(); - paddle::framework::Tensor* index = new paddle::framework::Tensor(); - paddle::framework::Tensor* output = new paddle::framework::Tensor(); + phi::DenseTensor* src = new phi::DenseTensor(); + phi::DenseTensor* index = new phi::DenseTensor(); + phi::DenseTensor* output = new phi::DenseTensor(); int* p_src = nullptr; int* p_index = nullptr; diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc index b80bc7320c1fd..e2ee27f2561e1 100644 --- a/paddle/fluid/operators/gaussian_random_op.cc +++ b/paddle/fluid/operators/gaussian_random_op.cc @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class CPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel { @@ -34,7 +34,7 @@ class CPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { float mean = context.Attr("mean"); float std = context.Attr("std"); - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); @@ -75,7 +75,7 @@ class GaussianRandomOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "ShapeTensor" || var_name == "ShapeTensorList") { return expected_kernel_type; diff --git a/paddle/fluid/operators/gaussian_random_op.cu b/paddle/fluid/operators/gaussian_random_op.cu index 4df716f79f2af..41d2547cc9ba0 100644 --- a/paddle/fluid/operators/gaussian_random_op.cu +++ b/paddle/fluid/operators/gaussian_random_op.cu @@ -51,7 +51,7 @@ template class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); T mean = static_cast(context.Attr("mean")); diff --git a/paddle/fluid/operators/gaussian_random_op_mlu.cc b/paddle/fluid/operators/gaussian_random_op_mlu.cc index 4b5229b9e63ea..a70ddc428d840 100644 --- a/paddle/fluid/operators/gaussian_random_op_mlu.cc +++ b/paddle/fluid/operators/gaussian_random_op_mlu.cc @@ -20,14 +20,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class MLUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { float mean = context.Attr("mean"); float std = context.Attr("std"); - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); Tensor cpu_tensor(tensor->type()); diff --git a/paddle/fluid/operators/gaussian_random_op_npu.cc b/paddle/fluid/operators/gaussian_random_op_npu.cc index 8b3af57d923fe..0768f4be5c957 100644 --- a/paddle/fluid/operators/gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/gaussian_random_op_npu.cc @@ -25,14 +25,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class NPUGaussianRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { float mean = context.Attr("mean"); float std = context.Attr("std"); - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); Tensor cpu_tensor(tensor->dtype()); diff --git a/paddle/fluid/operators/gelu_op_npu.cc b/paddle/fluid/operators/gelu_op_npu.cc index 15e16420582c3..f462336b412a3 100644 --- a/paddle/fluid/operators/gelu_op_npu.cc +++ b/paddle/fluid/operators/gelu_op_npu.cc @@ -23,15 +23,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class GeluNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -50,10 +50,10 @@ template class GeluGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/graph_khop_sampler_op.cu b/paddle/fluid/operators/graph_khop_sampler_op.cu index fc8f195fb70a8..c83419f309237 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.cu +++ b/paddle/fluid/operators/graph_khop_sampler_op.cu @@ -49,7 +49,7 @@ constexpr int WARP_SIZE = 32; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct MaxFunctor { @@ -420,9 +420,9 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // 1. Get sample neighbors operators' inputs. - auto* src = ctx.Input("Row"); - auto* dst_count = ctx.Input("Col_Ptr"); - auto* vertices = ctx.Input("X"); + auto* src = ctx.Input("Row"); + auto* dst_count = ctx.Input("Col_Ptr"); + auto* vertices = ctx.Input("X"); std::vector sample_sizes = ctx.Attr>("sample_sizes"); bool return_eids = ctx.Attr("return_eids"); @@ -451,7 +451,7 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel { bool is_last_layer = false, is_first_layer = true; if (return_eids) { - auto* src_eids = ctx.Input("Eids"); + auto* src_eids = ctx.Input("Eids"); const T* src_eids_data = src_eids->data(); for (int i = 0; i < num_layers; i++) { if (i == num_layers - 1) { @@ -563,7 +563,7 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel { eids_merge_ptr); } } - auto* out_eids = ctx.Output("Out_Eids"); + auto* out_eids = ctx.Output("Out_Eids"); out_eids->Resize({static_cast(eids_merge.size())}); T* p_out_eids = out_eids->mutable_data(ctx.GetPlace()); thrust::copy(eids_merge.begin(), eids_merge.end(), p_out_eids); @@ -592,11 +592,11 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel { &orig_nodes, &reindex_nodes, bs); - auto* reindex_x = ctx.Output("Reindex_X"); + auto* reindex_x = ctx.Output("Reindex_X"); T* p_reindex_x = reindex_x->mutable_data(ctx.GetPlace()); thrust::copy(reindex_nodes.begin(), reindex_nodes.end(), p_reindex_x); - auto* sample_index = ctx.Output("Sample_Index"); + auto* sample_index = ctx.Output("Sample_Index"); sample_index->Resize({static_cast(subset.size())}); T* p_sample_index = sample_index->mutable_data(ctx.GetPlace()); thrust::copy(subset.begin(), subset.end(), p_sample_index); // Done! @@ -628,8 +628,8 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel { thrust::raw_pointer_cast(dst_merge.data())); // 8. Give operator's outputs. - auto* out_src = ctx.Output("Out_Src"); - auto* out_dst = ctx.Output("Out_Dst"); + auto* out_src = ctx.Output("Out_Src"); + auto* out_dst = ctx.Output("Out_Dst"); out_src->Resize({static_cast(src_merge.size()), 1}); out_dst->Resize({static_cast(src_merge.size()), 1}); T* p_out_src = out_src->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/graph_khop_sampler_op.h b/paddle/fluid/operators/graph_khop_sampler_op.h index 1b08acbbedd23..278bbd5efd723 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.h +++ b/paddle/fluid/operators/graph_khop_sampler_op.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void SampleUniqueNeighbors(bidiiter begin, bidiiter end, int num_samples) { @@ -198,9 +198,9 @@ class GraphKhopSamplerOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // 1. Get sample neighbors operators' inputs. - auto* src = ctx.Input("Row"); - auto* dst_count = ctx.Input("Col_Ptr"); - auto* vertices = ctx.Input("X"); + auto* src = ctx.Input("Row"); + auto* dst_count = ctx.Input("Col_Ptr"); + auto* vertices = ctx.Input("X"); std::vector sample_sizes = ctx.Attr>("sample_sizes"); bool return_eids = ctx.Attr("return_eids"); @@ -229,7 +229,7 @@ class GraphKhopSamplerOpKernel : public framework::OpKernel { bool is_last_layer = false, is_first_layer = true; if (return_eids) { - auto* src_eids = ctx.Input("Eids"); + auto* src_eids = ctx.Input("Eids"); const T* src_eids_data = src_eids->data(); for (size_t i = 0; i < num_layers; i++) { if (i == num_layers - 1) { @@ -336,7 +336,7 @@ class GraphKhopSamplerOpKernel : public framework::OpKernel { eids_merge_ptr); } } - auto* out_eids = ctx.Output("Out_Eids"); + auto* out_eids = ctx.Output("Out_Eids"); out_eids->Resize({static_cast(eids_merge.size())}); T* p_out_eids = out_eids->mutable_data(ctx.GetPlace()); std::copy(eids_merge.begin(), eids_merge.end(), p_out_eids); @@ -377,16 +377,16 @@ class GraphKhopSamplerOpKernel : public framework::OpKernel { } // 7. Get Reindex_X for input nodes. - auto* reindex_x = ctx.Output("Reindex_X"); + auto* reindex_x = ctx.Output("Reindex_X"); T* p_reindex_x = reindex_x->mutable_data(ctx.GetPlace()); for (size_t i = 0; i < bs; i++) { p_reindex_x[i] = node_map[p_vertices[i]]; } // 8. Get operator's outputs. - auto* sample_index = ctx.Output("Sample_Index"); - auto* out_src = ctx.Output("Out_Src"); - auto* out_dst = ctx.Output("Out_Dst"); + auto* sample_index = ctx.Output("Sample_Index"); + auto* out_src = ctx.Output("Out_Src"); + auto* out_dst = ctx.Output("Out_Dst"); sample_index->Resize({static_cast(unique_nodes.size())}); out_src->Resize({static_cast(src_merge.size()), 1}); out_dst->Resize({static_cast(src_merge.size()), 1}); diff --git a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc index da9ccdf627f44..9230e114bd3bb 100644 --- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc +++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc @@ -25,7 +25,6 @@ class DenseTensor; namespace paddle { namespace operators { -using framework::Tensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using DataLayout = platform::DataLayout; using ScopedSpatialTransformerDescriptor = @@ -43,9 +42,9 @@ class CUDNNGridSampleOpKernel : public framework::OpKernel { "It must use CUDAPlace when using CUDA Kernel")); auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); - auto* input = ctx.Input("X"); - auto* grid = ctx.Input("Grid"); - auto* output = ctx.Output("Output"); + auto* input = ctx.Input("X"); + auto* grid = ctx.Input("Grid"); + auto* output = ctx.Output("Output"); int n = input->dims()[0]; int c = input->dims()[1]; @@ -92,11 +91,14 @@ class CUDNNGridSampleGradOpKernel : public framework::OpKernel { "It must use CUDAPlace when using CUDA Kernel")); auto& dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); - auto* input = ctx.Input("X"); - auto* grid = ctx.Input("Grid"); - auto* output_grad = ctx.Input(framework::GradVarName("Output")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* grid_grad = ctx.Output(framework::GradVarName("Grid")); + auto* input = ctx.Input("X"); + auto* grid = ctx.Input("Grid"); + auto* output_grad = + ctx.Input(framework::GradVarName("Output")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* grid_grad = + ctx.Output(framework::GradVarName("Grid")); auto output_grad_dims = output_grad->dims(); const int n = output_grad_dims[0]; diff --git a/paddle/fluid/operators/grid_sampler_op.cc b/paddle/fluid/operators/grid_sampler_op.cc index 12b18bc55e2eb..5d63f6b9a500f 100644 --- a/paddle/fluid/operators/grid_sampler_op.cc +++ b/paddle/fluid/operators/grid_sampler_op.cc @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class GridSampleOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/grid_sampler_op_mlu.cc b/paddle/fluid/operators/grid_sampler_op_mlu.cc index 8327eaad14425..b62cc6b555c55 100644 --- a/paddle/fluid/operators/grid_sampler_op_mlu.cc +++ b/paddle/fluid/operators/grid_sampler_op_mlu.cc @@ -18,7 +18,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class GridSamplerMLUKernel : public framework::OpKernel { @@ -30,9 +30,9 @@ class GridSamplerMLUKernel : public framework::OpKernel { platform::errors::Unavailable("This kernel only runs on MLU.")); // input and output data - const Tensor* input = ctx.Input("X"); - const Tensor* grid = ctx.Input("Grid"); - Tensor* output = ctx.Output("Output"); + const phi::DenseTensor* input = ctx.Input("X"); + const phi::DenseTensor* grid = ctx.Input("Grid"); + phi::DenseTensor* output = ctx.Output("Output"); int n = input->dims()[0]; int c = input->dims()[1]; diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc index 1ac0093735925..7f9c2cea9bb45 100644 --- a/paddle/fluid/operators/group_norm_op.cc +++ b/paddle/fluid/operators/group_norm_op.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/group_norm_op.h" + #include #include #include @@ -26,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu index 105d4d6c75efe..bda0124ffa72b 100644 --- a/paddle/fluid/operators/group_norm_op.cu +++ b/paddle/fluid/operators/group_norm_op.cu @@ -268,13 +268,13 @@ class GroupNormKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); const float epsilon = ctx.Attr("epsilon"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("Mean"); - auto* var = ctx.Output("Variance"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); const auto groups = ctx.Attr("groups"); const auto x_dims = x->dims(); @@ -616,19 +616,20 @@ class GroupNormGradKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); const float epsilon = ctx.Attr("epsilon"); - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* mean = ctx.Input("Mean"); - auto* var = ctx.Input("Variance"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* d_y = ctx.Input(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* mean = ctx.Input("Mean"); + auto* var = ctx.Input("Variance"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); const auto groups = ctx.Attr("groups"); // init output - auto* d_x = ctx.Output(framework::GradVarName("X")); - auto* d_scale = ctx.Output(framework::GradVarName("Scale")); - auto* d_bias = ctx.Output(framework::GradVarName("Bias")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto* d_bias = ctx.Output(framework::GradVarName("Bias")); const auto& x_dims = x->dims(); const int C = diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h index 28a3ad2a8e1ee..97ea7ce8f5d39 100644 --- a/paddle/fluid/operators/group_norm_op.h +++ b/paddle/fluid/operators/group_norm_op.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; @@ -40,13 +40,13 @@ class GroupNormKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); const float epsilon = ctx.Attr("epsilon"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("Mean"); - auto* var = ctx.Output("Variance"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); const auto groups = ctx.Attr("groups"); const auto x_dims = x->dims(); @@ -221,17 +221,18 @@ class GroupNormGradKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); const float epsilon = ctx.Attr("epsilon"); - auto* x = ctx.Input("Y"); - auto* var = ctx.Input("Variance"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* d_y = ctx.Input(framework::GradVarName("Y")); + auto* x = ctx.Input("Y"); + auto* var = ctx.Input("Variance"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); const auto groups = ctx.Attr("groups"); // init output - auto* d_x = ctx.Output(framework::GradVarName("X")); - auto* d_scale = ctx.Output(framework::GradVarName("Scale")); - auto* d_bias = ctx.Output(framework::GradVarName("Bias")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto* d_bias = ctx.Output(framework::GradVarName("Bias")); const auto& x_dims = x->dims(); const int C = diff --git a/paddle/fluid/operators/group_norm_op_npu.cc b/paddle/fluid/operators/group_norm_op_npu.cc index a39c44768c224..0e817515f915c 100644 --- a/paddle/fluid/operators/group_norm_op_npu.cc +++ b/paddle/fluid/operators/group_norm_op_npu.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct GroupNormFunction { @@ -32,8 +32,8 @@ struct GroupNormFunction { stream = ctx.template device_context() .stream(); } - void ReduceMean(const Tensor* x, - Tensor* y, + void ReduceMean(const phi::DenseTensor* x, + phi::DenseTensor* y, const std::vector& dim, bool keep_dims = true) { // y should be init first @@ -41,8 +41,8 @@ struct GroupNormFunction { "ReduceMeanD", {*x}, {*y}, {{"axes", dim}, {"keep_dims", keep_dims}}); runner.Run(stream); } - void ReduceSum(const Tensor* x, - Tensor* y, + void ReduceSum(const phi::DenseTensor* x, + phi::DenseTensor* y, const std::vector& dim, bool keep_dims = true) { // y should be init first @@ -50,48 +50,60 @@ struct GroupNormFunction { "ReduceSumD", {*x}, {*y}, {{"axes", dim}, {"keep_dims", keep_dims}}); runner.Run(stream); } - void Add(const Tensor* x, const Tensor* y, Tensor* z) { + void Add(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + void Sub(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + void Mul(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Div(const Tensor* x, const Tensor* y, Tensor* z) { + void Div(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("Div", {*x, *y}, {*z}, {}); runner.Run(stream); } - void DivNoNan(const Tensor* x, const Tensor* y, Tensor* z) { + void DivNoNan(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // y should be init first const auto& runner = NpuOpRunner("DivNoNan", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + void Transpose(const phi::DenseTensor* x, + phi::DenseTensor* y, + const std::vector& axis) { // y should be init first const auto& runner = NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}}); runner.Run(stream); } - void Sqrt(const Tensor* x, Tensor* y) { + void Sqrt(const phi::DenseTensor* x, phi::DenseTensor* y) { // y should be init first const auto& runner = NpuOpRunner("Sqrt", {*x}, {*y}, {}); runner.Run(stream); } - void Adds(const Tensor* x, float scalar, Tensor* y) { + void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { // y should be init first const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - Tensor ReduceMeanToNG(const Tensor* x, + Tensor ReduceMeanToNG(const phi::DenseTensor* x, const DataLayout& data_layout, const int64_t N, const int64_t C, @@ -129,13 +141,13 @@ class GroupNormNPUKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); const float epsilon = ctx.Attr("epsilon"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* x = ctx.Input("X"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("Mean"); - auto* var = ctx.Output("Variance"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); const auto groups = ctx.Attr("groups"); auto place = ctx.GetPlace(); @@ -203,18 +215,19 @@ class GroupNormGradNPUKernel : public framework::OpKernel { const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); const float epsilon = ctx.Attr("epsilon"); - auto* y = ctx.Input("Y"); - auto* var = ctx.Input("Variance"); + auto* y = ctx.Input("Y"); + auto* var = ctx.Input("Variance"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* d_y = ctx.Input(framework::GradVarName("Y")); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); const auto G = ctx.Attr("groups"); // init output - auto* d_x = ctx.Output(framework::GradVarName("X")); - auto* d_scale = ctx.Output(framework::GradVarName("Scale")); - auto* d_bias = ctx.Output(framework::GradVarName("Bias")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto* d_bias = ctx.Output(framework::GradVarName("Bias")); GroupNormFunction F(ctx); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index 1040f2c2ea066..fc78f514a4507 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -26,8 +26,6 @@ DECLARE_int32(paddle_num_threads); namespace paddle { namespace operators { -using framework::Tensor; - class GRUOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -321,10 +319,10 @@ class GRUCPUKernel : public framework::OpKernel { bool origin_mode = context.Attr("origin_mode"); auto* input = context.Input("Input"); - auto* h0 = context.Input("H0"); - auto* weight = context.Input("Weight"); + auto* h0 = context.Input("H0"); + auto* weight = context.Input("Weight"); const T* weight_data = weight->data(); - auto* bias = context.Input("Bias"); + auto* bias = context.Input("Bias"); auto* hidden = context.Output("Hidden"); hidden->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index f3665da181641..2d63eb4d3a698 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -26,10 +26,10 @@ class GRUKernel : public framework::OpKernel { bool is_test = context.Attr("is_test"); bool origin_mode = context.Attr("origin_mode"); auto* input = context.Input("Input"); - auto* h0 = context.Input("H0"); - auto* weight = context.Input("Weight"); + auto* h0 = context.Input("H0"); + auto* weight = context.Input("Weight"); const T* weight_data = weight->data(); - auto* bias = context.Input("Bias"); + auto* bias = context.Input("Bias"); auto* hidden = context.Output("Hidden"); hidden->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 3e931e7bfa8e0..b95932b51802f 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -26,13 +26,13 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, + const phi::DenseTensor& src, framework::Vector index_lod, - framework::Tensor* dst, + phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); @@ -44,8 +44,8 @@ class GRUGradKernel : public framework::OpKernel { public: void BatchCompute(const framework::ExecutionContext& context) const { bool origin_mode = context.Attr("origin_mode"); - auto* h0 = context.Input("H0"); - auto* weight = context.Input("Weight"); + auto* h0 = context.Input("H0"); + auto* weight = context.Input("Weight"); const T* weight_data = weight->data(); auto* batch_gate = context.Input("BatchGate"); auto* batch_reset_hidden_prev = @@ -56,10 +56,12 @@ class GRUGradKernel : public framework::OpKernel { context.Input(framework::GradVarName("Hidden")); auto* input_grad = context.Output(framework::GradVarName("Input")); - auto* h0_grad = context.Output(framework::GradVarName("H0")); + auto* h0_grad = + context.Output(framework::GradVarName("H0")); auto* weight_grad = - context.Output(framework::GradVarName("Weight")); - auto* bias_grad = context.Output(framework::GradVarName("Bias")); + context.Output(framework::GradVarName("Weight")); + auto* bias_grad = + context.Output(framework::GradVarName("Bias")); auto gate_dims = batch_gate->dims(); auto hidden_dims = hidden->dims(); diff --git a/paddle/fluid/operators/gru_unit_op.cc b/paddle/fluid/operators/gru_unit_op.cc index 24d4771fac539..8e05454f1aefc 100644 --- a/paddle/fluid/operators/gru_unit_op.cc +++ b/paddle/fluid/operators/gru_unit_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class GRUUnitOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index bae428fc570d1..3ed3179a63e63 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 }; @@ -55,15 +55,16 @@ class GRUUnitKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("Input"); - auto* hidden_prev = context.Input("HiddenPrev"); - auto* weight = context.Input("Weight"); - auto* bias = context.Input("Bias"); - auto* gate = context.Output("Gate"); + auto* input = context.Input("Input"); + auto* hidden_prev = context.Input("HiddenPrev"); + auto* weight = context.Input("Weight"); + auto* bias = context.Input("Bias"); + auto* gate = context.Output("Gate"); gate->mutable_data(context.GetPlace()); - auto* reset_hidden_prev = context.Output("ResetHiddenPrev"); + auto* reset_hidden_prev = + context.Output("ResetHiddenPrev"); reset_hidden_prev->mutable_data(context.GetPlace()); - auto* hidden = context.Output("Hidden"); + auto* hidden = context.Output("Hidden"); hidden->mutable_data(context.GetPlace()); int batch_size = input->dims()[0]; @@ -175,18 +176,22 @@ class GRUUnitGradKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("Input"); - auto* hidden_prev = context.Input("HiddenPrev"); - auto* weight = context.Input("Weight"); - auto* gate = context.Input("Gate"); - auto* reset_hidden_prev = context.Input("ResetHiddenPrev"); - auto* hidden_grad = context.Input(framework::GradVarName("Hidden")); - auto* input_grad = context.Output(framework::GradVarName("Input")); + auto* input = context.Input("Input"); + auto* hidden_prev = context.Input("HiddenPrev"); + auto* weight = context.Input("Weight"); + auto* gate = context.Input("Gate"); + auto* reset_hidden_prev = + context.Input("ResetHiddenPrev"); + auto* hidden_grad = + context.Input(framework::GradVarName("Hidden")); + auto* input_grad = + context.Output(framework::GradVarName("Input")); auto* hidden_prev_grad = - context.Output(framework::GradVarName("HiddenPrev")); + context.Output(framework::GradVarName("HiddenPrev")); auto* weight_grad = - context.Output(framework::GradVarName("Weight")); - auto* bias_grad = context.Output(framework::GradVarName("Bias")); + context.Output(framework::GradVarName("Weight")); + auto* bias_grad = + context.Output(framework::GradVarName("Bias")); Tensor gate_grad; Tensor reset_hidden_prev_grad; diff --git a/paddle/fluid/operators/hinge_loss_op.h b/paddle/fluid/operators/hinge_loss_op.h index 78e253ad4b0cb..8f06154c79060 100644 --- a/paddle/fluid/operators/hinge_loss_op.h +++ b/paddle/fluid/operators/hinge_loss_op.h @@ -24,9 +24,9 @@ template class HingeLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* pred = context.Input("Logits"); - auto* label = context.Input("Labels"); - auto* loss = context.Output("Loss"); + auto* pred = context.Input("Logits"); + auto* label = context.Input("Labels"); + auto* loss = context.Output("Loss"); auto& place = *context.template device_context().eigen_device(); @@ -42,12 +42,12 @@ template class HingeLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* pred = context.Input("Logits"); - auto* label = context.Input("Labels"); + auto* pred = context.Input("Logits"); + auto* label = context.Input("Labels"); auto* dloss = - context.Input(framework::GradVarName("Loss")); + context.Input(framework::GradVarName("Loss")); auto* dpred = - context.Output(framework::GradVarName("Logits")); + context.Output(framework::GradVarName("Logits")); auto& place = *context.template device_context().eigen_device(); diff --git a/paddle/fluid/operators/histogram_op.cc b/paddle/fluid/operators/histogram_op.cc index fc1bec5a2e52b..9d58d65c83135 100644 --- a/paddle/fluid/operators/histogram_op.cc +++ b/paddle/fluid/operators/histogram_op.cc @@ -24,7 +24,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; class HistogramOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/huber_loss_op_npu.cc b/paddle/fluid/operators/huber_loss_op_npu.cc index 61944c2caaf3b..a7be6feb628bf 100644 --- a/paddle/fluid/operators/huber_loss_op_npu.cc +++ b/paddle/fluid/operators/huber_loss_op_npu.cc @@ -18,14 +18,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void HuberLossSub(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, - const Tensor* y, - Tensor* z) { + const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { // Calculate z = x - y z->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); @@ -35,9 +35,9 @@ void HuberLossSub(const platform::Place& place, template void HuberLossMuls(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, + const phi::DenseTensor* x, float scalar, - Tensor* y) { + phi::DenseTensor* y) { // Calculate y = x + scale y->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scalar}}); @@ -47,8 +47,8 @@ void HuberLossMuls(const platform::Place& place, template void HuberLossZerosLike(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, - Tensor* y) { + const phi::DenseTensor* x, + phi::DenseTensor* y) { y->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("ZerosLike", {*x}, {*y}, {}); runner.Run(stream); @@ -57,10 +57,10 @@ void HuberLossZerosLike(const platform::Place& place, template void HuberLossSmoothL1Loss(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, - const Tensor* y, + const phi::DenseTensor* x, + const phi::DenseTensor* y, float delta, - Tensor* z) { + phi::DenseTensor* z) { z->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("SmoothL1Loss", {*x, *y}, {*z}, {{"sigma", delta}}); @@ -70,11 +70,11 @@ void HuberLossSmoothL1Loss(const platform::Place& place, template void HuberLossSmoothL1LossGrad(const platform::Place& place, const aclrtStream& stream, - const Tensor* pred, - const Tensor* lab, - const Tensor* dout, + const phi::DenseTensor* pred, + const phi::DenseTensor* lab, + const phi::DenseTensor* dout, float sigma, - Tensor* grad) { + phi::DenseTensor* grad) { grad->mutable_data(pred->dims(), place); const auto& runner = NpuOpRunner( "SmoothL1LossGrad", {*pred, *lab, *dout}, {*grad}, {{"sigma", sigma}}); @@ -85,10 +85,10 @@ template class HuberLossNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in0 = ctx.Input("X"); - auto* in1 = ctx.Input("Y"); - auto* residual = ctx.Output("Residual"); - auto* out = ctx.Output("Out"); + auto* in0 = ctx.Input("X"); + auto* in1 = ctx.Input("Y"); + auto* residual = ctx.Output("Residual"); + auto* out = ctx.Output("Out"); auto delta = ctx.Attr("delta"); auto stream = @@ -106,10 +106,10 @@ template class HuberLossGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* residual = ctx.Input("Residual"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* residual = ctx.Input("Residual"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); auto delta = ctx.Attr("delta"); auto stream = diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index 383a9abafeaea..b886eb602ccd2 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -26,7 +26,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; inline int Im2SeqOutputSize( @@ -40,7 +40,7 @@ template class Im2SequenceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* in = ctx.Input("X"); + const phi::DenseTensor* in = ctx.Input("X"); LoDTensor* out = ctx.Output("Out"); auto in_dim = in->dims(); int batch_size = in_dim[0]; @@ -51,7 +51,7 @@ class Im2SequenceKernel : public framework::OpKernel { auto strides = ctx.Attr>("strides"); auto paddings = ctx.Attr>("paddings"); if (ctx.HasInput("Y") && batch_size > 1) { - const Tensor* imgrealsize = ctx.Input("Y"); + const phi::DenseTensor* imgrealsize = ctx.Input("Y"); auto out_stride = ctx.Attr>("out_stride"); Tensor cpu_shape_tensor; paddle::framework::TensorCopySync( @@ -157,10 +157,10 @@ template class Im2SequenceGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - Tensor* d_out = - const_cast(ctx.Input(framework::GradVarName("Out"))); - auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* in = ctx.Input("X"); + phi::DenseTensor* d_out = const_cast( + ctx.Input(framework::GradVarName("Out"))); + auto* d_x = ctx.Output(framework::GradVarName("X")); d_x->mutable_data(ctx.GetPlace()); auto x_v = framework::EigenVector::Flatten(*d_x); diff --git a/paddle/fluid/operators/increment_op_npu.cc b/paddle/fluid/operators/increment_op_npu.cc index 231cadd661bcd..edd89ac4f9584 100644 --- a/paddle/fluid/operators/increment_op_npu.cc +++ b/paddle/fluid/operators/increment_op_npu.cc @@ -22,8 +22,8 @@ template class IncrementalNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x_tensor = context.Input("X"); - auto* out_tensor = context.Output("Out"); + auto* x_tensor = context.Input("X"); + auto* out_tensor = context.Output("Out"); float step = context.Attr("step"); out_tensor->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/index_sample_op_npu.cc b/paddle/fluid/operators/index_sample_op_npu.cc index 9dd0a76b9805f..dbb8410d1eada 100644 --- a/paddle/fluid/operators/index_sample_op_npu.cc +++ b/paddle/fluid/operators/index_sample_op_npu.cc @@ -17,13 +17,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void IndexSampleGather(const paddle::platform::NPUDeviceContext& dev_ctx, - const Tensor* index, - const Tensor* input, - Tensor* out) { + const phi::DenseTensor* index, + const phi::DenseTensor* input, + phi::DenseTensor* out) { auto index_dims = index->dims(); auto input_dims = input->dims(); auto batch_size = input_dims[0]; @@ -72,9 +72,9 @@ class IndexSampleNPUKernel : public framework::OpKernel { template void IndexSampleGradScatter(const paddle::platform::NPUDeviceContext& dev_ctx, - const Tensor* index, - const Tensor* out_grad, - Tensor* x_grad) { + const phi::DenseTensor* index, + const phi::DenseTensor* out_grad, + phi::DenseTensor* x_grad) { auto index_dims = index->dims(); auto input_dims = x_grad->dims(); auto batch_size = input_dims[0]; diff --git a/paddle/fluid/operators/index_select_op.cc b/paddle/fluid/operators/index_select_op.cc index c6bed95e83dc5..83b0eefecf77f 100644 --- a/paddle/fluid/operators/index_select_op.cc +++ b/paddle/fluid/operators/index_select_op.cc @@ -23,8 +23,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class IndexSelectOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index bf878bbbbc82c..7a6b605df3944 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DDim = framework::DDim; diff --git a/paddle/fluid/operators/index_select_op_npu.cc b/paddle/fluid/operators/index_select_op_npu.cc index 22f4d0161f028..0f18f9793d305 100644 --- a/paddle/fluid/operators/index_select_op_npu.cc +++ b/paddle/fluid/operators/index_select_op_npu.cc @@ -19,17 +19,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class IndexSelectNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* index = ctx.Input("Index"); + auto* x = ctx.Input("X"); + auto* index = ctx.Input("Index"); auto dim = ctx.Attr("dim"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); auto stream = @@ -50,10 +50,9 @@ template class IndexSelectGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x_grad = ctx.Output(framework::GradVarName("X")); - auto* index = ctx.Input("Index"); - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); + auto* index = ctx.Input("Index"); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); auto stream = ctx.template device_context() diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index 791fef1f7c59d..c326656b46f0a 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -40,26 +40,26 @@ class InplaceABNOp : public paddle::operators::BatchNormOp { if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), - platform::errors::InvalidArgument( - "Mean input should be of float type")); - PADDLE_ENFORCE_EQ( - bn_param_type, - framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), - platform::errors::InvalidArgument( - "Variance input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Mean")->dtype()), + platform::errors::InvalidArgument( + "Mean input should be of float type")); + PADDLE_ENFORCE_EQ(bn_param_type, + framework::TransToProtoVarType( + ctx.Input("Variance")->dtype()), + platform::errors::InvalidArgument( + "Variance input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; @@ -138,13 +138,13 @@ class InplaceABNGradOp : public paddle::operators::BatchNormGradOp { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { const auto* var = ctx.InputVar(framework::GradVarName("Y")); - auto input_data_type = - framework::TransToProtoVarType(ctx.Input("Y")->dtype()); + auto input_data_type = framework::TransToProtoVarType( + ctx.Input("Y")->dtype()); if (var == nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( "can't find gradient variable of Y")); } - const Tensor* t = nullptr; + const phi::DenseTensor* t = nullptr; if (var->IsType()) { t = &var->Get(); } else if (var->IsType()) { @@ -221,8 +221,8 @@ template class InplaceABNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Output("Y"); + auto* x = ctx.Input("X"); + auto* y = ctx.Output("Y"); PADDLE_ENFORCE_EQ(x, y, platform::errors::InvalidArgument( @@ -231,10 +231,10 @@ class InplaceABNKernel : public framework::OpKernel { GetInplaceABNActivationType(ctx.Attr("activation")); auto& place = *ctx.template device_context().eigen_device(); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* mean = ctx.Input("Mean"); - auto* variance = ctx.Input("Variance"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* mean = ctx.Input("Mean"); + auto* variance = ctx.Input("Variance"); auto momentum = ctx.Attr("momentum"); auto epsilon = ctx.Attr("epsilon"); @@ -244,11 +244,11 @@ class InplaceABNKernel : public framework::OpKernel { auto trainable_statistics = ctx.Attr("trainable_statistics"); auto fuse_with_relu = ctx.Attr("fuse_with_relu"); - auto* mean_out = ctx.Output("MeanOut"); - auto* variance_out = ctx.Output("VarianceOut"); - auto* saved_mean = ctx.Output("SavedMean"); - auto* saved_variance = ctx.Output("SavedVariance"); - auto* reserve_space = ctx.Output("ReserveSpace"); + auto* mean_out = ctx.Output("MeanOut"); + auto* variance_out = ctx.Output("VarianceOut"); + auto* saved_mean = ctx.Output("SavedMean"); + auto* saved_variance = ctx.Output("SavedVariance"); + auto* reserve_space = ctx.Output("ReserveSpace"); auto& dev_ctx = ctx.device_context(); phi::BatchNormKernel( @@ -283,9 +283,9 @@ template class InplaceABNGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* y = ctx.Input("Y"); - auto* d_y = ctx.Input(framework::GradVarName("Y")); - auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* y = ctx.Input("Y"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); + auto* d_x = ctx.Output(framework::GradVarName("X")); PADDLE_ENFORCE_EQ(d_x, d_y, platform::errors::InvalidArgument( @@ -304,10 +304,10 @@ class InplaceABNGradKernel : public framework::OpKernel { // BatchNormGradKernel::Compute(ctx); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* saved_mean = ctx.Input("SavedMean"); - auto* saved_variance = ctx.Input("SavedVariance"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* saved_mean = ctx.Input("SavedMean"); + auto* saved_variance = ctx.Input("SavedVariance"); auto momentum = ctx.Attr("momentum"); auto epsilon = ctx.Attr("epsilon"); @@ -317,12 +317,14 @@ class InplaceABNGradKernel : public framework::OpKernel { auto trainable_statistics = ctx.Attr("trainable_statistics"); auto fuse_with_relu = ctx.Attr("fuse_with_relu"); - auto* scale_grad = ctx.Output(framework::GradVarName("Scale")); - auto* bias_grad = ctx.Output(framework::GradVarName("Bias")); + auto* scale_grad = + ctx.Output(framework::GradVarName("Scale")); + auto* bias_grad = + ctx.Output(framework::GradVarName("Bias")); - auto* reserve_space = ctx.Input("ReserveSpace"); - auto* mean = ctx.Input("ReserveSpace"); - auto* variance = ctx.Input("ReserveSpace"); + auto* reserve_space = ctx.Input("ReserveSpace"); + auto* mean = ctx.Input("ReserveSpace"); + auto* variance = ctx.Input("ReserveSpace"); paddle::optional space_opt; paddle::optional mean_opt; diff --git a/paddle/fluid/operators/inplace_abn_op.cu b/paddle/fluid/operators/inplace_abn_op.cu index 044b8118abb0e..0ee6d686a7539 100644 --- a/paddle/fluid/operators/inplace_abn_op.cu +++ b/paddle/fluid/operators/inplace_abn_op.cu @@ -27,8 +27,8 @@ template class InplaceABNKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* y = ctx.Output("Y"); - auto* x = ctx.Input("X"); + auto* y = ctx.Output("Y"); + auto* x = ctx.Input("X"); PADDLE_ENFORCE_EQ(x, y, platform::errors::InvalidArgument( @@ -37,10 +37,10 @@ class InplaceABNKernel : public framework::OpKernel { GetInplaceABNActivationType(ctx.Attr("activation")); auto& place = *ctx.template device_context().eigen_device(); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* mean = ctx.Input("Mean"); - auto* variance = ctx.Input("Variance"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* mean = ctx.Input("Mean"); + auto* variance = ctx.Input("Variance"); auto momentum = ctx.Attr("momentum"); auto epsilon = ctx.Attr("epsilon"); @@ -50,11 +50,11 @@ class InplaceABNKernel : public framework::OpKernel { auto trainable_statistics = ctx.Attr("trainable_statistics"); auto fuse_with_relu = ctx.Attr("fuse_with_relu"); - auto* mean_out = ctx.Output("MeanOut"); - auto* variance_out = ctx.Output("VarianceOut"); - auto* saved_mean = ctx.Output("SavedMean"); - auto* saved_variance = ctx.Output("SavedVariance"); - auto* reserve_space = ctx.Output("ReserveSpace"); + auto* mean_out = ctx.Output("MeanOut"); + auto* variance_out = ctx.Output("VarianceOut"); + auto* saved_mean = ctx.Output("SavedMean"); + auto* saved_variance = ctx.Output("SavedVariance"); + auto* reserve_space = ctx.Output("ReserveSpace"); if (ctx.Attr("use_sync_bn")) { auto& dev_ctx = ctx.device_context(); @@ -116,9 +116,9 @@ template class InplaceABNGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const auto* y = ctx.Input("Y"); - auto* d_y = ctx.Input(framework::GradVarName("Y")); - auto* d_x = ctx.Output(framework::GradVarName("X")); + const auto* y = ctx.Input("Y"); + auto* d_y = ctx.Input(framework::GradVarName("Y")); + auto* d_x = ctx.Output(framework::GradVarName("X")); PADDLE_ENFORCE_EQ(d_x, d_y, platform::errors::InvalidArgument( @@ -135,10 +135,10 @@ class InplaceABNGradKernel : public framework::OpKernel { InplaceABNActivation functor; functor.GradCompute(ctx, activation, place, cur_y, cur_y, cur_dy, cur_dy); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* saved_mean = ctx.Input("SavedMean"); - auto* saved_variance = ctx.Input("SavedVariance"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* saved_mean = ctx.Input("SavedMean"); + auto* saved_variance = ctx.Input("SavedVariance"); auto momentum = ctx.Attr("momentum"); auto epsilon = ctx.Attr("epsilon"); @@ -148,12 +148,14 @@ class InplaceABNGradKernel : public framework::OpKernel { auto trainable_statistics = ctx.Attr("trainable_statistics"); auto fuse_with_relu = ctx.Attr("fuse_with_relu"); - auto* scale_grad = ctx.Output(framework::GradVarName("Scale")); - auto* bias_grad = ctx.Output(framework::GradVarName("Bias")); + auto* scale_grad = + ctx.Output(framework::GradVarName("Scale")); + auto* bias_grad = + ctx.Output(framework::GradVarName("Bias")); - auto* reserve_space = ctx.Input("ReserveSpace"); - auto* mean = ctx.Input("ReserveSpace"); - auto* variance = ctx.Input("ReserveSpace"); + auto* reserve_space = ctx.Input("ReserveSpace"); + auto* mean = ctx.Input("ReserveSpace"); + auto* variance = ctx.Input("ReserveSpace"); if (ctx.Attr("use_sync_bn")) { auto& dev_ctx = ctx.device_context(); diff --git a/paddle/fluid/operators/inplace_abn_op.h b/paddle/fluid/operators/inplace_abn_op.h index 2b4e89f1c85fa..2a9568e845492 100644 --- a/paddle/fluid/operators/inplace_abn_op.h +++ b/paddle/fluid/operators/inplace_abn_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index 0da87b2a7c4e3..ae4da5c51a088 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -40,18 +40,18 @@ framework::OpKernelType InstanceNormOp::GetExpectedKernelType( in_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ( - in_param_type, - framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); + PADDLE_ENFORCE_EQ(in_param_type, + framework::TransToProtoVarType( + ctx.Input("Scale")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ( - in_param_type, - framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ(in_param_type, + framework::TransToProtoVarType( + ctx.Input("Bias")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); } return framework::OpKernelType(input_data_type, ctx.GetPlace()); diff --git a/paddle/fluid/operators/instance_norm_op.h b/paddle/fluid/operators/instance_norm_op.h index 3f99cdf10c64b..43505cac2817b 100644 --- a/paddle/fluid/operators/instance_norm_op.h +++ b/paddle/fluid/operators/instance_norm_op.h @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; diff --git a/paddle/fluid/operators/instance_norm_op_npu.cc b/paddle/fluid/operators/instance_norm_op_npu.cc index 89c6a310d746d..0370cef2ed0cf 100644 --- a/paddle/fluid/operators/instance_norm_op_npu.cc +++ b/paddle/fluid/operators/instance_norm_op_npu.cc @@ -18,19 +18,19 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class InstanceNormNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto epsilon = ctx.Attr("epsilon"); - const auto* x = ctx.Input("X"); - const auto* scale = ctx.Input("Scale"); - const auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("SavedMean"); - auto* variance = ctx.Output("SavedVariance"); + const auto* x = ctx.Input("X"); + const auto* scale = ctx.Input("Scale"); + const auto* bias = ctx.Input("Bias"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("SavedMean"); + auto* variance = ctx.Output("SavedVariance"); auto& dev_ctx = ctx.template device_context(); dev_ctx.template Alloc(y); diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 213d14ec48f66..056b81fd9a2ef 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -23,7 +23,6 @@ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { @@ -359,7 +358,7 @@ class InterpolateOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && @@ -612,7 +611,7 @@ class InterpolateOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "SizeTensor" || var_name == "Scale") { return expected_kernel_type; diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 80534d29b5ae4..1e2ba7501a5dc 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -19,7 +19,6 @@ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; template @@ -913,8 +912,8 @@ __global__ void KeBicubicInterpBw(T* in, template static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, - const Tensor& input, - Tensor* output) { + const phi::DenseTensor& input, + phi::DenseTensor* output) { auto* input_data = input.data(); const std::string data_layout_str = ctx.Attr("data_layout"); @@ -928,14 +927,14 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, int out_w = ctx.Attr("out_w"); - auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); if (list_new_shape_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_shape_tensor); out_w = new_size[0]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -945,7 +944,7 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, if (scale > 0) { out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { Tensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); @@ -1005,8 +1004,8 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, template static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, - const Tensor& input, - Tensor* output) { + const phi::DenseTensor& input, + phi::DenseTensor* output) { auto* input_data = input.data(); const std::string data_layout_str = ctx.Attr("data_layout"); @@ -1021,7 +1020,7 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); - auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); if (list_new_shape_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_shape_tensor); @@ -1029,7 +1028,7 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, out_w = new_size[1]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1040,7 +1039,7 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, out_h = static_cast(in_h * scale); out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { Tensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); @@ -1157,8 +1156,8 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, template static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, - const Tensor& input, - Tensor* output) { + const phi::DenseTensor& input, + phi::DenseTensor* output) { auto* input_data = input.data(); const std::string data_layout_str = ctx.Attr("data_layout"); @@ -1174,7 +1173,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); - auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); if (list_new_shape_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_shape_tensor); @@ -1183,7 +1182,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, out_w = new_size[2]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1195,7 +1194,7 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, out_h = static_cast(in_h * scale); out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { Tensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); @@ -1289,9 +1288,9 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, template static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, - Tensor* input_grad, + phi::DenseTensor* input_grad, const Tensor output_grad) { - auto* input = ctx.Input("X"); + auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1303,7 +1302,7 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, int out_w = ctx.Attr("out_w"); float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1314,14 +1313,14 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { Tensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); auto size_data = sizes.data(); out_w = size_data[0]; } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1380,9 +1379,9 @@ static void Interpolate1DCUDABwd(const framework::ExecutionContext& ctx, template static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, - Tensor* input_grad, + phi::DenseTensor* input_grad, const Tensor output_grad) { - auto* input = ctx.Input("X"); + auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1395,7 +1394,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1407,7 +1406,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { Tensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); @@ -1415,7 +1414,7 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, out_h = size_data[0]; out_w = size_data[1]; } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1526,9 +1525,9 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, template static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, - Tensor* input_grad, - const Tensor& output_grad) { - auto* input = ctx.Input("X"); + phi::DenseTensor* input_grad, + const phi::DenseTensor& output_grad) { + auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1542,7 +1541,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1555,7 +1554,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { Tensor sizes; framework::TensorCopySync(*out_size, platform::CPUPlace(), &sizes); @@ -1564,7 +1563,7 @@ static void Interpolate3DCUDABwd(const framework::ExecutionContext& ctx, out_h = size_data[1]; out_w = size_data[2]; } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1651,8 +1650,8 @@ class InterpolateOpCUDAKernel : public framework::OpKernel { platform::is_gpu_place(ctx.GetPlace()), true, platform::errors::NotFound("This kernel only runs on GPU device.")); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); auto input_dims = input->dims(); if (input_dims.size() == 3) { // 1D interpolation @@ -1673,8 +1672,10 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel { platform::is_gpu_place(ctx.GetPlace()), true, platform::errors::NotFound("This kernel only runs on GPU device.")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); auto output_grad_dims = output_grad->dims(); if (output_grad_dims.size() == 3) { // 1D interpolation diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index ff474cfff9727..87825d5fa4ddd 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -26,11 +26,11 @@ template using EigenTensor = framework::EigenTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; inline std::vector get_new_shape( - const std::vector& list_new_shape_tensor) { + const std::vector& list_new_shape_tensor) { // get tensor from std::vector vec_new_shape; for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { @@ -43,7 +43,7 @@ inline std::vector get_new_shape( tensor->dims())); if (platform::is_gpu_place(tensor->place()) || platform::is_mlu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_shape.push_back(static_cast(*temp.data())); } else { @@ -55,10 +55,11 @@ inline std::vector get_new_shape( } template -inline std::vector get_new_data_from_tensor(const Tensor* new_data_tensor) { +inline std::vector get_new_data_from_tensor( + const phi::DenseTensor* new_data_tensor) { std::vector vec_new_data; auto* new_data = new_data_tensor->data(); - framework::Tensor cpu_starts_tensor; + phi::DenseTensor cpu_starts_tensor; if (platform::is_gpu_place(new_data_tensor->place()) || platform::is_mlu_place(new_data_tensor->place())) { paddle::framework::TensorCopySync( @@ -97,8 +98,8 @@ inline void ExtractNCDWH(const framework::DDim& dims, } template -static void NearestNeighborInterpolate(const Tensor& input, - Tensor* output, +static void NearestNeighborInterpolate(const phi::DenseTensor& input, + phi::DenseTensor* output, const float ratio_h, const float ratio_w, const int n, @@ -131,8 +132,8 @@ static void NearestNeighborInterpolate(const Tensor& input, } template -static void LinearInterpolation(const Tensor& input, - Tensor* output, +static void LinearInterpolation(const phi::DenseTensor& input, + phi::DenseTensor* output, const float ratio_w, const int in_w, const int n, @@ -195,8 +196,8 @@ static void LinearInterpolation(const Tensor& input, } template -static void LinearInterpolationGrad(const Tensor& output_grad, - Tensor* input_grad, +static void LinearInterpolationGrad(const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad, const float ratio_w, const int in_w, const int n, @@ -237,8 +238,8 @@ static void LinearInterpolationGrad(const Tensor& output_grad, } template -static void BilinearInterpolation(const Tensor& input, - Tensor* output, +static void BilinearInterpolation(const phi::DenseTensor& input, + phi::DenseTensor* output, const float ratio_h, const float ratio_w, const int in_h, @@ -337,8 +338,8 @@ static void BilinearInterpolation(const Tensor& input, } template -static void TrilinearInterpolation(const Tensor& input, - Tensor* output, +static void TrilinearInterpolation(const phi::DenseTensor& input, + phi::DenseTensor* output, const float ratio_d, const float ratio_h, const float ratio_w, @@ -522,8 +523,8 @@ static inline T cubic_interp(T x0, T x1, T x2, T x3, T t) { } template -static void BicubicInterpolation(const Tensor& input, - Tensor* output, +static void BicubicInterpolation(const phi::DenseTensor& input, + phi::DenseTensor* output, const float ratio_h, const float ratio_w, const int in_h, @@ -602,8 +603,8 @@ static void BicubicInterpolation(const Tensor& input, } template -static void NearestNeighborInterpolateGrad(const Tensor& output_grad, - Tensor* input_grad, +static void NearestNeighborInterpolateGrad(const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad, const float ratio_h, const float ratio_w, const int n, @@ -637,8 +638,8 @@ static void NearestNeighborInterpolateGrad(const Tensor& output_grad, } template -static void BilinearInterpolationGrad(const Tensor& output_grad, - Tensor* input_grad, +static void BilinearInterpolationGrad(const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad, const float ratio_h, const float ratio_w, const int in_h, @@ -696,8 +697,8 @@ static void BilinearInterpolationGrad(const Tensor& output_grad, } template -static void TrilinearInterpolationGrad(const Tensor& output_grad, - Tensor* input_grad, +static void TrilinearInterpolationGrad(const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad, const float ratio_d, const float ratio_h, const float ratio_w, @@ -793,8 +794,8 @@ static void TrilinearInterpolationGrad(const Tensor& output_grad, } template -static void BicubicInterpolationGrad(const Tensor& output_grad, - Tensor* input_grad, +static void BicubicInterpolationGrad(const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad, const float ratio_h, const float ratio_w, const int in_h, @@ -854,8 +855,8 @@ static void BicubicInterpolationGrad(const Tensor& output_grad, template static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, - const Tensor& input, - Tensor* output) { + const phi::DenseTensor& input, + phi::DenseTensor* output) { const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -866,14 +867,14 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, int align_mode = ctx.Attr("align_mode"); int out_w = ctx.Attr("out_w"); - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); out_w = new_size[0]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -883,7 +884,7 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, if (scale > 0) { out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { auto out_size_data = get_new_data_from_tensor(out_size); out_w = out_size_data[0]; @@ -928,8 +929,8 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, template static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, - const Tensor& input, - Tensor* output) { + const phi::DenseTensor& input, + phi::DenseTensor* output) { const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -942,7 +943,7 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -950,7 +951,7 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, out_w = new_size[1]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -961,7 +962,7 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, out_h = static_cast(in_h * scale); out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { auto out_size_data = get_new_data_from_tensor(out_size); out_h = out_size_data[0]; @@ -1045,8 +1046,8 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, template static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, - const Tensor& input, - Tensor* output) { + const phi::DenseTensor& input, + phi::DenseTensor* output) { const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1060,7 +1061,7 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1069,7 +1070,7 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, out_w = new_size[2]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1081,7 +1082,7 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, out_h = static_cast(in_h * scale); out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { auto out_size_data = get_new_data_from_tensor(out_size); out_d = out_size_data[0]; @@ -1157,9 +1158,9 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, template static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, - Tensor* input_grad, - const Tensor& output_grad) { - auto* input = ctx.Input("X"); + phi::DenseTensor* input_grad, + const phi::DenseTensor& output_grad) { + auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1171,7 +1172,7 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, int out_w = ctx.Attr("out_w"); float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1181,12 +1182,12 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, if (scale > 0) { out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { auto out_size_data = get_new_data_from_tensor(out_size); out_w = out_size_data[0]; } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1231,9 +1232,9 @@ static void Interpolate1DCPUBwd(const framework::ExecutionContext& ctx, template static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, - Tensor* input_grad, - const Tensor& output_grad) { - auto* input = ctx.Input("X"); + phi::DenseTensor* input_grad, + const phi::DenseTensor& output_grad) { + auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1246,7 +1247,7 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1257,13 +1258,13 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, out_h = static_cast(in_h * scale); out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { auto out_size_data = get_new_data_from_tensor(out_size); out_h = out_size_data[0]; out_w = out_size_data[1]; } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1342,9 +1343,9 @@ static void Interpolate2DCPUBwd(const framework::ExecutionContext& ctx, template static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, - Tensor* input_grad, + phi::DenseTensor* input_grad, const Tensor output_grad) { - auto* input = ctx.Input("X"); + auto* input = ctx.Input("X"); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = framework::StringToDataLayout(data_layout_str); int n, c, in_d, in_h, in_w; @@ -1358,7 +1359,7 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, int out_h = ctx.Attr("out_h"); int out_w = ctx.Attr("out_w"); float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale = scale_data[0]; @@ -1370,14 +1371,14 @@ static void Interpolate3DCPUBwd(const framework::ExecutionContext& ctx, out_h = static_cast(in_h * scale); out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { auto out_size_data = get_new_data_from_tensor(out_size); out_d = out_size_data[0]; out_h = out_size_data[1]; out_w = out_size_data[2]; } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape(list_new_size_tensor); @@ -1442,8 +1443,8 @@ template class InterpolateKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); auto input_dims = input->dims(); if (input_dims.size() == 3) { // 1D interpolation @@ -1460,8 +1461,10 @@ template class InterpolateGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); auto output_grad_dims = output_grad->dims(); if (output_grad_dims.size() == 3) { // 1D interpolation grad diff --git a/paddle/fluid/operators/interpolate_op_npu.cc b/paddle/fluid/operators/interpolate_op_npu.cc index 3548506eea4c1..1e99738a6b620 100644 --- a/paddle/fluid/operators/interpolate_op_npu.cc +++ b/paddle/fluid/operators/interpolate_op_npu.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; inline static void CheckArgument(const framework::ExecutionContext& ctx) { @@ -68,7 +68,7 @@ static void CalcOutSize(const framework::ExecutionContext& ctx, *out_w = ctx.Attr("out_w"); auto dev_ctx = platform::DeviceContextPool::Instance().Get(ctx.GetPlace()); - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { std::vector new_size_h(1); @@ -79,7 +79,7 @@ static void CalcOutSize(const framework::ExecutionContext& ctx, *out_w = new_size_w[0]; } else { float scale; - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { std::vector scale_data; framework::TensorToVector(*scale_tensor, *dev_ctx, &scale_data); @@ -93,7 +93,7 @@ static void CalcOutSize(const framework::ExecutionContext& ctx, *out_w = static_cast(in_w * scale); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { std::vector out_size_data; framework::TensorToVector(*out_size, *dev_ctx, &out_size_data); @@ -124,7 +124,7 @@ class InterpolateNPUKernel : public framework::OpKernel { // when 'align_corners' is 'true' or data type is 'double' CheckArgument(ctx); - auto* input = ctx.Input("X"); + auto* input = ctx.Input("X"); framework::DDim input_dims = input->dims(); const std::string data_layout_str = @@ -141,7 +141,7 @@ class InterpolateNPUKernel : public framework::OpKernel { input_x.ShareDataWith(*input); input_x.set_layout(data_layout); - auto* output = ctx.Output("Out"); + auto* output = ctx.Output("Out"); framework::DDim output_dims; if (data_layout == DataLayout::kNCHW) { output_dims = {n, c, out_h, out_w}; @@ -175,7 +175,7 @@ class InterpolateGradNPUKernel : public framework::OpKernel { // when 'align_corners' is 'true' or data type is 'double' CheckArgument(ctx); - auto* input = ctx.Input("X"); + auto* input = ctx.Input("X"); framework::DDim input_dims = input->dims(); const std::string data_layout_str = @@ -188,12 +188,14 @@ class InterpolateGradNPUKernel : public framework::OpKernel { CalcOutSize(ctx, h, w, &out_h, &out_w); // the 'output_grad' tensor may has no set (or wrong set) of the layout - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); Tensor output_grad_tmp(output_grad->type()); output_grad_tmp.ShareDataWith(*output_grad); output_grad_tmp.set_layout(data_layout); - auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); input_grad->set_layout(data_layout); framework::DDim input_grad_dims; if (data_layout == DataLayout::kNCHW) { diff --git a/paddle/fluid/operators/interpolate_v2_op.cc b/paddle/fluid/operators/interpolate_v2_op.cc index 1bb68699a8553..e7a362f543b76 100644 --- a/paddle/fluid/operators/interpolate_v2_op.cc +++ b/paddle/fluid/operators/interpolate_v2_op.cc @@ -25,7 +25,6 @@ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { @@ -463,7 +462,7 @@ class InterpolateV2Op : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && @@ -713,7 +712,7 @@ class InterpolateV2OpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "SizeTensor" || var_name == "Scale") { return expected_kernel_type; diff --git a/paddle/fluid/operators/interpolate_v2_op_mlu.cc b/paddle/fluid/operators/interpolate_v2_op_mlu.cc index 9e39d97f710c5..1383be6f93fb9 100644 --- a/paddle/fluid/operators/interpolate_v2_op_mlu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_mlu.cc @@ -20,11 +20,10 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; inline std::vector get_new_shape_mlu( - const std::vector& list_new_shape_tensor) { + const std::vector& list_new_shape_tensor) { // get tensor from std::vector vec_new_shape; for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { @@ -33,7 +32,7 @@ inline std::vector get_new_shape_mlu( tensor->dims(), phi::make_ddim({1}), platform::errors::InvalidArgument("shape of dim tensor should be [1]")); - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_shape.push_back(static_cast(*temp.data())); } @@ -46,8 +45,8 @@ class InterpolateV2MLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); auto input_dims = input->dims(); PADDLE_ENFORCE_GE( @@ -79,7 +78,7 @@ class InterpolateV2MLUKernel : public framework::OpKernel { float scale_h = -1; float scale_w = -1; - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape_mlu(list_new_size_tensor); @@ -94,7 +93,7 @@ class InterpolateV2MLUKernel : public framework::OpKernel { out_w = new_size[2]; } } else { - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); auto scale = ctx.Attr>("scale"); if (scale_tensor != nullptr) { std::vector scale_data; @@ -146,7 +145,7 @@ class InterpolateV2MLUKernel : public framework::OpKernel { if (scale_d > 0.) { out_d = static_cast(in_d * scale_d); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { std::vector out_size_data; out_size_data = GetDataFromTensor(out_size); @@ -359,8 +358,10 @@ class InterpolateV2GradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); auto output_grad_dims = output_grad->dims(); @@ -369,7 +370,7 @@ class InterpolateV2GradMLUKernel : public framework::OpKernel { platform::errors::External( "XPU Interpolategrad kernel only support 2d")); - auto* input = ctx.Input("X"); + auto* input = ctx.Input("X"); auto input_dims = input->dims(); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = @@ -388,14 +389,14 @@ class InterpolateV2GradMLUKernel : public framework::OpKernel { float scale_h = -1; float scale_w = -1; - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { // have size tensor auto new_size = get_new_shape_mlu(list_new_size_tensor); out_h = new_size[0]; out_w = new_size[1]; } else { - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); auto scale = ctx.Attr>("scale"); if (scale_tensor != nullptr) { std::vector scale_data; @@ -428,7 +429,7 @@ class InterpolateV2GradMLUKernel : public framework::OpKernel { out_h = static_cast(in_h * scale_h); out_w = static_cast(in_w * scale_w); } - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); if (out_size != nullptr) { std::vector out_size_data; out_size_data = GetDataFromTensor(out_size); diff --git a/paddle/fluid/operators/interpolate_v2_op_npu.cc b/paddle/fluid/operators/interpolate_v2_op_npu.cc index ea11d3c87a812..cb84e694dade4 100644 --- a/paddle/fluid/operators/interpolate_v2_op_npu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; using DDim = framework::DDim; using fp16 = paddle::platform::float16; @@ -38,54 +38,60 @@ struct InterpolateFunction { FillNpuTensorWithConstant(&t0, static_cast(0)); FillNpuTensorWithConstant(&t1, static_cast(1)); } - void Arange(int n, Tensor* x) { + void Arange(int n, phi::DenseTensor* x) { FillNpuTensorWithConstant(&tn, static_cast(n)); const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {*x}, {}); runner.Run(stream); } - void ReduceSum(const Tensor* x, - Tensor* y, + void ReduceSum(const phi::DenseTensor* x, + phi::DenseTensor* y, const std::vector& dim, bool keep_dims = true) { const auto& runner = NpuOpRunner( "ReduceSumD", {*x}, {*y}, {{"axes", dim}, {"keep_dims", keep_dims}}); runner.Run(stream); } - void Add(const Tensor* x, const Tensor* y, Tensor* z) { + void Add(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Adds(const Tensor* x, float scalar, Tensor* y) { + void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - void Mul(const Tensor* x, const Tensor* y, Tensor* z) { + void Mul(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Sub(const Tensor* x, const Tensor* y, Tensor* z) { + void Sub(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Cast(const Tensor* x, Tensor* y) { + void Cast(const phi::DenseTensor* x, phi::DenseTensor* y) { auto dst_dtype = ConvertToNpuDtype(framework::TransToProtoVarType(y->dtype())); const auto& runner = NpuOpRunner( "Cast", {*x}, {*y}, {{"dst_type", static_cast(dst_dtype)}}); runner.Run(stream); } - void Gather(const Tensor* x, - const Tensor* indices, + void Gather(const phi::DenseTensor* x, + const phi::DenseTensor* indices, const int axis, - Tensor* y) { + phi::DenseTensor* y) { const auto& runner = NpuOpRunner("GatherV2D", {*x, *indices}, {*y}, {{"axis", axis}}); runner.Run(stream); } - void GatherGrad(const Tensor* gy, - const Tensor* indices, + void GatherGrad(const phi::DenseTensor* gy, + const phi::DenseTensor* indices, const int axis, - Tensor* gx) { + phi::DenseTensor* gx) { // 1 gy swapaxis: axis & 0 int len = (gy->dims()).size(); std::vector axis_swap(len); @@ -115,32 +121,38 @@ struct InterpolateFunction { // 3 gx swapaxis: axis, 0 Transpose(&gx_t, gx, axis_swap); } - void Scatter(const Tensor* x, - const Tensor* index, - const Tensor* updates, - Tensor* y) { + void Scatter(const phi::DenseTensor* x, + const phi::DenseTensor* index, + const phi::DenseTensor* updates, + phi::DenseTensor* y) { const auto& runner = NpuOpRunner("TensorScatterAdd", {*x, *index, *updates}, {*y}, {}); runner.Run(stream); } - void Transpose(const Tensor* x, Tensor* y, const std::vector& axis) { + void Transpose(const phi::DenseTensor* x, + phi::DenseTensor* y, + const std::vector& axis) { const auto& runner = NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}}); runner.Run(stream); } - void Muls(const Tensor* x, float scalar, Tensor* y) { + void Muls(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) { const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scalar}}); runner.Run(stream); } - void Maximum(const Tensor* x, const Tensor* y, Tensor* z) { + void Maximum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Minimum(const Tensor* x, const Tensor* y, Tensor* z) { + void Minimum(const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {}); runner.Run(stream); } - void Floor(const Tensor* x, Tensor* y) { + void Floor(const phi::DenseTensor* x, phi::DenseTensor* y) { const auto& runner = NpuOpRunner("Floor", {*x}, {*y}, {}); runner.Run(stream); } @@ -155,7 +167,7 @@ struct InterpolateFunction { }; template <> -void InterpolateFunction::Arange(int n, Tensor* x) { +void InterpolateFunction::Arange(int n, phi::DenseTensor* x) { Tensor x_fp32(experimental::DataType::FLOAT32); x_fp32.mutable_data(x->dims(), place); FillNpuTensorWithConstant(&tn, static_cast(n)); @@ -216,14 +228,14 @@ void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, bool align_cond, float ratio_h, float ratio_w, - Tensor* h0, - Tensor* h1, - Tensor* w0, - Tensor* w1, - Tensor* coef_h0, - Tensor* coef_h1, - Tensor* coef_w0, - Tensor* coef_w1) { + phi::DenseTensor* h0, + phi::DenseTensor* h1, + phi::DenseTensor* w0, + phi::DenseTensor* w1, + phi::DenseTensor* coef_h0, + phi::DenseTensor* coef_h1, + phi::DenseTensor* coef_w0, + phi::DenseTensor* coef_w1) { InterpolateFunction F(ctx); auto place = ctx.GetPlace(); Tensor _h0, _w0; @@ -291,8 +303,8 @@ void BilinearParamTensorCompute(const framework::ExecutionContext& ctx, template void BilinearFwdNpu(const framework::ExecutionContext& ctx, - const Tensor* input, - Tensor* output, + const phi::DenseTensor* input, + phi::DenseTensor* output, const float scale_h, const float scale_w, const bool align_corners, @@ -382,8 +394,8 @@ void BilinearFwdNpu(const framework::ExecutionContext& ctx, template void BilinearBwdNpu(const framework::ExecutionContext& ctx, - const Tensor* gout, - Tensor* gin, + const phi::DenseTensor* gout, + phi::DenseTensor* gin, const float scale_h, const float scale_w, const bool align_corners, @@ -477,8 +489,8 @@ template class InterpolateV2NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); auto input_dims = input->dims(); PADDLE_ENFORCE_EQ(input_dims.size(), @@ -509,8 +521,7 @@ class InterpolateV2NPUKernel : public framework::OpKernel { float scale_w = -1; // Priority: SizeTensor > OutSize > Scale > scale > out_h & out_w - auto list_new_shape_tensor = - ctx.MultiInput("SizeTensor"); + auto list_new_shape_tensor = ctx.MultiInput("SizeTensor"); if (list_new_shape_tensor.size() > 0) { std::vector output_h(1); std::vector output_w(1); @@ -521,12 +532,12 @@ class InterpolateV2NPUKernel : public framework::OpKernel { out_h = output_h[0]; out_w = output_w[0]; } else if (ctx.HasInput("OutSize")) { - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); auto out_size_data = phi::funcs::get_new_data_from_tensor(out_size); out_h = out_size_data[0]; out_w = out_size_data[1]; } else { - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); auto scale = ctx.Attr>("scale"); if (scale_tensor != nullptr) { auto scale_data = @@ -634,9 +645,11 @@ template class InterpolateV2NPUGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* input = ctx.Input("X"); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); const std::string data_layout_str = ctx.Attr("data_layout"); const DataLayout data_layout = @@ -661,7 +674,7 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel { float scale_w = -1; // Priority: SizeTensor > OutSize > Scale > scale > out_h & out_w - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); if (list_new_size_tensor.size() > 0) { std::vector output_h(1); std::vector output_w(1); @@ -672,12 +685,12 @@ class InterpolateV2NPUGradKernel : public framework::OpKernel { out_h = output_h[0]; out_w = output_w[0]; } else if (ctx.HasInput("OutSize")) { - auto out_size = ctx.Input("OutSize"); + auto out_size = ctx.Input("OutSize"); auto out_size_data = phi::funcs::get_new_data_from_tensor(out_size); out_h = out_size_data[0]; out_w = out_size_data[1]; } else { - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); auto scale = ctx.Attr>("scale"); if (scale_tensor != nullptr) { auto scale_data = diff --git a/paddle/fluid/operators/ipu/ipu_runtime_op.cc b/paddle/fluid/operators/ipu/ipu_runtime_op.cc index 802cc13ae4e07..e243c8f7d9e36 100644 --- a/paddle/fluid/operators/ipu/ipu_runtime_op.cc +++ b/paddle/fluid/operators/ipu/ipu_runtime_op.cc @@ -35,8 +35,8 @@ class IpuRuntimeOp : public framework::OperatorBase { auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place); framework::RuntimeContext runtime_ctx(inputs_, outputs_, scope); framework::ExecutionContext ctx(*this, scope, *dev_ctx, runtime_ctx); - auto inputs = ctx.MultiInput("FeedList"); - auto outputs = ctx.MultiOutput("FetchList"); + auto inputs = ctx.MultiInput("FeedList"); + auto outputs = ctx.MultiOutput("FetchList"); auto output_names = ctx.OutputNames("FetchList"); VLOG(4) << "IpuRuntime Kernel, begin to run graph"; ipu_backend->Run(inputs, outputs, ctx); diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h index 427d35699867b..88d60bc1c15ad 100644 --- a/paddle/fluid/operators/isfinite_op.h +++ b/paddle/fluid/operators/isfinite_op.h @@ -32,14 +32,14 @@ class DenseTensor; namespace paddle { namespace framework { // store the result bool in gpu tensor, async operation. Faster than above ones. -void TensorContainsNAN(const framework::Tensor& tensor, framework::Tensor* out); -void TensorContainsInf(const framework::Tensor& tensor, framework::Tensor* out); -void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out); +void TensorContainsNAN(const phi::DenseTensor& tensor, phi::DenseTensor* out); +void TensorContainsInf(const phi::DenseTensor& tensor, phi::DenseTensor* out); +void TensorIsfinite(const phi::DenseTensor& tensor, phi::DenseTensor* out); // copy the result bool to cpu -bool TensorContainsNAN(const framework::Tensor& tensor); -bool TensorContainsInf(const framework::Tensor& tensor); -bool TensorIsfinite(const framework::Tensor& tensor); +bool TensorContainsNAN(const phi::DenseTensor& tensor); +bool TensorContainsInf(const phi::DenseTensor& tensor); +bool TensorIsfinite(const phi::DenseTensor& tensor); #define FiniteVisitor(type, reduce_type, device) \ struct type##Visitor##device { \ @@ -50,7 +50,7 @@ bool TensorIsfinite(const framework::Tensor& tensor); auto place = in_.place(); \ auto* ctx = static_cast( \ platform::DeviceContextPool::Instance().Get(place)); \ - Tensor tmp; \ + phi::DenseTensor tmp; \ tmp.Resize(in_.dims()); \ out_->Resize({1}); \ std::vector dims(tmp.dims().size()); \ @@ -73,8 +73,8 @@ FiniteVisitor(Isfinite, All, GPU); #endif // store the result bool in gpu tensor, async operation. Faster than above ones. -inline void TensorContainsNAN(const framework::Tensor& tensor, - framework::Tensor* out) { +inline void TensorContainsNAN(const phi::DenseTensor& tensor, + phi::DenseTensor* out) { auto place = tensor.place(); if (platform::is_cpu_place(tensor.place())) { VisitDataTypeNormal(TransToProtoVarType(tensor.dtype()), @@ -90,8 +90,8 @@ inline void TensorContainsNAN(const framework::Tensor& tensor, #endif PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); } -inline void TensorContainsInf(const framework::Tensor& tensor, - framework::Tensor* out) { +inline void TensorContainsInf(const phi::DenseTensor& tensor, + phi::DenseTensor* out) { auto place = tensor.place(); if (platform::is_cpu_place(tensor.place())) { VisitDataTypeNormal(TransToProtoVarType(tensor.dtype()), @@ -107,8 +107,8 @@ inline void TensorContainsInf(const framework::Tensor& tensor, #endif PADDLE_THROW(platform::errors::Unimplemented("Not supported on %s.", place)); } -inline void TensorIsfinite(const framework::Tensor& tensor, - framework::Tensor* out) { +inline void TensorIsfinite(const phi::DenseTensor& tensor, + phi::DenseTensor* out) { auto place = tensor.place(); if (platform::is_cpu_place(tensor.place())) { VisitDataTypeNormal(TransToProtoVarType(tensor.dtype()), @@ -126,37 +126,37 @@ inline void TensorIsfinite(const framework::Tensor& tensor, } // copy the result bool to cpu -inline bool TensorContainsNAN(const framework::Tensor& tensor) { - Tensor out; +inline bool TensorContainsNAN(const phi::DenseTensor& tensor) { + phi::DenseTensor out; TensorContainsNAN(tensor, &out); return GetValue(&out); } -inline bool TensorContainsInf(const framework::Tensor& tensor) { - Tensor out; +inline bool TensorContainsInf(const phi::DenseTensor& tensor) { + phi::DenseTensor out; TensorContainsInf(tensor, &out); return GetValue(&out); } -inline bool TensorIsfinite(const framework::Tensor& tensor) { - Tensor out; +inline bool TensorIsfinite(const phi::DenseTensor& tensor) { + phi::DenseTensor out; TensorIsfinite(tensor, &out); return GetValue(&out); } } // namespace framework namespace operators { struct InfinityFunctor { - void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + void operator()(const phi::DenseTensor& tensor, phi::DenseTensor* out) { framework::TensorContainsInf(tensor, out); } }; struct NANFunctor { - void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + void operator()(const phi::DenseTensor& tensor, phi::DenseTensor* out) { framework::TensorContainsNAN(tensor, out); } }; struct IsfiniteFunctor { - void operator()(const framework::Tensor& tensor, framework::Tensor* out) { + void operator()(const phi::DenseTensor& tensor, phi::DenseTensor* out) { framework::TensorIsfinite(tensor, out); } }; @@ -166,22 +166,22 @@ class OverflowKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { auto* x = ctx.InputVar("X"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); Functor functor; if (x->IsType()) { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); functor(*in, out); } else if (x->IsType()) { auto& in = ctx.Input("X")->value(); functor(in, out); } else { - PADDLE_ENFORCE_EQ( - true, - false, - platform::errors::InvalidArgument( - "The input type mismatch, the type of Input(X) must be Tensor or " - "SelectedRows, please check your input.")); + PADDLE_ENFORCE_EQ(true, + false, + platform::errors::InvalidArgument( + "The input type mismatch, the type of Input(X) " + "must be phi::DenseTensor or " + "SelectedRows, please check your input.")); } } }; diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 50fd6056d84b0..8070527a56a8c 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -135,7 +135,7 @@ void BenchAllImpls(const typename KernelTuple::attr_type& attr, Args... args) { LOG(INFO) << loginfos.str(); } -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; template void BenchKernelXYZN() { using T = typename KernelTuple::data_type; diff --git a/paddle/fluid/operators/kldiv_loss_op.cc b/paddle/fluid/operators/kldiv_loss_op.cc index decee5567b486..9a06fd369f882 100644 --- a/paddle/fluid/operators/kldiv_loss_op.cc +++ b/paddle/fluid/operators/kldiv_loss_op.cc @@ -19,8 +19,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class KLDivLossOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/kldiv_loss_op_npu.cc b/paddle/fluid/operators/kldiv_loss_op_npu.cc index 38ad17249bec5..f21e939a7b118 100644 --- a/paddle/fluid/operators/kldiv_loss_op_npu.cc +++ b/paddle/fluid/operators/kldiv_loss_op_npu.cc @@ -20,15 +20,15 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class KLDivLossNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* target = ctx.Input("Target"); - auto* loss = ctx.Output("Loss"); + auto* input = ctx.Input("X"); + auto* target = ctx.Input("Target"); + auto* loss = ctx.Output("Loss"); auto reduction = ctx.Attr("reduction"); loss->mutable_data(ctx.GetPlace()); @@ -103,9 +103,11 @@ template class KLDivLossGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* target = ctx.Input("Target"); - auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* target = ctx.Input("Target"); + auto* loss_grad = + ctx.Input(framework::GradVarName("Loss")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); auto reduction = ctx.Attr("reduction"); input_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/kron_op.cc b/paddle/fluid/operators/kron_op.cc index 250a5ae0061ed..707d9a47006f2 100644 --- a/paddle/fluid/operators/kron_op.cc +++ b/paddle/fluid/operators/kron_op.cc @@ -38,7 +38,7 @@ class KronOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input @@ -120,7 +120,7 @@ class KronGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input diff --git a/paddle/fluid/operators/l1_norm_op.cc b/paddle/fluid/operators/l1_norm_op.cc index 093a33d89b03f..112a84b00e329 100644 --- a/paddle/fluid/operators/l1_norm_op.cc +++ b/paddle/fluid/operators/l1_norm_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class L1NormOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/l1_norm_op.h b/paddle/fluid/operators/l1_norm_op.h index 5629ea60dbc40..36465c14bf00a 100644 --- a/paddle/fluid/operators/l1_norm_op.h +++ b/paddle/fluid/operators/l1_norm_op.h @@ -25,8 +25,8 @@ template class L1NormKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const framework::Tensor *X = context.Input("X"); - framework::Tensor *Out = context.Output("Out"); + const phi::DenseTensor *X = context.Input("X"); + phi::DenseTensor *Out = context.Output("Out"); Out->mutable_data(context.GetPlace()); auto x = framework::EigenVector::Flatten(*X); @@ -43,16 +43,16 @@ template class L1NormGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const framework::Tensor *x = context.Input("X"); - const framework::Tensor *d_out = - context.Input(framework::GradVarName("Out")); + const phi::DenseTensor *x = context.Input("X"); + const phi::DenseTensor *d_out = + context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ( d_out->numel(), 1, platform::errors::InvalidArgument( "Input(GRAD@Out) of L1NormGradOP should be a scalar.")); - framework::Tensor *dx = - context.Output(framework::GradVarName("X")); + phi::DenseTensor *dx = + context.Output(framework::GradVarName("X")); dx->mutable_data(context.GetPlace()); auto x_eigen = framework::EigenVector::Flatten(*x); diff --git a/paddle/fluid/operators/label_smooth_op_mlu.cc b/paddle/fluid/operators/label_smooth_op_mlu.cc index 8a91dc1f4c75a..d667db483a9b6 100644 --- a/paddle/fluid/operators/label_smooth_op_mlu.cc +++ b/paddle/fluid/operators/label_smooth_op_mlu.cc @@ -18,7 +18,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -26,7 +26,7 @@ class LabelSmoothMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in_t = ctx.Input("X"); - auto* dist_t = ctx.Input("PriorDist"); + auto* dist_t = ctx.Input("PriorDist"); auto* out_t = ctx.Output("Out"); auto epsilon = ctx.Attr("epsilon"); auto epsilon_gt = 1.0f - epsilon; @@ -55,7 +55,7 @@ class LabelSmoothMLUKernel : public framework::OpKernel { epsilon); } else { auto& dev_ctx = ctx.template device_context(); - framework::Tensor dist_tensor = + phi::DenseTensor dist_tensor = ctx.AllocateTmpTensor({1, label_dim}, dev_ctx); MLUCnnlTensorDesc dist_desc(dist_tensor); auto value = static_cast(1.0f / label_dim); diff --git a/paddle/fluid/operators/label_smooth_op_npu.cc b/paddle/fluid/operators/label_smooth_op_npu.cc index 7289770fc60ed..d899dbf99c525 100644 --- a/paddle/fluid/operators/label_smooth_op_npu.cc +++ b/paddle/fluid/operators/label_smooth_op_npu.cc @@ -18,15 +18,15 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template void LabelSmoothMuls(const platform::Place& place, const aclrtStream& stream, - const Tensor* in, + const phi::DenseTensor* in, float val, - Tensor* out) { + phi::DenseTensor* out) { out->mutable_data(in->dims(), place); const auto& runner = NpuOpRunner("Muls", {*in}, {*out}, {{"value", val}}); runner.Run(stream); @@ -35,9 +35,9 @@ void LabelSmoothMuls(const platform::Place& place, template void LabelSmoothAdds(const platform::Place& place, const aclrtStream& stream, - const Tensor* in, + const phi::DenseTensor* in, float val, - Tensor* out) { + phi::DenseTensor* out) { out->mutable_data(in->dims(), place); const auto& runner = NpuOpRunner("Adds", {*in}, {*out}, {{"value", val}}); runner.Run(stream); @@ -46,9 +46,9 @@ void LabelSmoothAdds(const platform::Place& place, template void LabelSmoothAddBroadCast(const platform::Place& place, const aclrtStream& stream, - const Tensor* in1, - const Tensor* in2, - Tensor* out) { + const phi::DenseTensor* in1, + const phi::DenseTensor* in2, + phi::DenseTensor* out) { out->mutable_data(place); const auto& runner = NpuOpRunner("AddV2", {*in1, *in2}, {*out}, {}); runner.Run(stream); @@ -60,7 +60,7 @@ class LabelSmoothNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* out_t = ctx.Output("Out"); auto* in_t = ctx.Input("X"); - auto* dist_t = ctx.Input("PriorDist"); + auto* dist_t = ctx.Input("PriorDist"); auto epsilon = ctx.Attr("epsilon"); auto label_dim = in_t->dims()[in_t->dims().size() - 1]; @@ -90,8 +90,8 @@ template class LabelSmoothGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* d_out_t = ctx.Input(framework::GradVarName("Out")); - auto* d_in_t = ctx.Output(framework::GradVarName("X")); + auto* d_out_t = ctx.Input(framework::GradVarName("Out")); + auto* d_in_t = ctx.Output(framework::GradVarName("X")); auto epsilon = ctx.Attr("epsilon"); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/layer_norm_kernel.cu.h b/paddle/fluid/operators/layer_norm_kernel.cu.h index 899eae3efb45b..d9fa06b7e52f5 100644 --- a/paddle/fluid/operators/layer_norm_kernel.cu.h +++ b/paddle/fluid/operators/layer_norm_kernel.cu.h @@ -33,7 +33,7 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template using CudnnDataType = platform::CudnnDataType; template @@ -937,12 +937,12 @@ void ln_bwd_fast_kernel_driver(const phi::GPUContext &dev_ctx, const int gridx = 2 * dev_ctx.GetSMCount(); // get temp space for dscale and dbias. - framework::Tensor dscale_temp; + phi::DenseTensor dscale_temp; dscale_temp.Resize({gridx, cols}); dscale_temp.mutable_data(dev_ctx.GetPlace()); U *dscale_temp_ptr = dscale_temp.data(); - framework::Tensor dbias_temp; + phi::DenseTensor dbias_temp; dbias_temp.Resize({gridx, cols}); dbias_temp.mutable_data(dev_ctx.GetPlace()); U *dbias_temp_ptr = dbias_temp.data(); diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index 13cd443473040..096125ff2f9d3 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DataLayout = framework::DataLayout; @@ -114,7 +114,8 @@ class LayerNormOp : public framework::OperatorWithKernel { #ifdef PADDLE_WITH_MKLDNN int begin_norm_axis = ctx.Attr("begin_norm_axis"); if (this->CanMKLDNNBeUsed(ctx, input_data_type) && - begin_norm_axis == ctx.Input("X")->dims().size() - 1) { + begin_norm_axis == + ctx.Input("X")->dims().size() - 1) { return framework::OpKernelType(input_data_type, ctx.GetPlace(), framework::DataLayout::kMKLDNN, diff --git a/paddle/fluid/operators/layer_norm_op_mlu.cc b/paddle/fluid/operators/layer_norm_op_mlu.cc index 5819cc3b40801..7058f9f094923 100644 --- a/paddle/fluid/operators/layer_norm_op_mlu.cc +++ b/paddle/fluid/operators/layer_norm_op_mlu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; template @@ -28,12 +28,12 @@ class LayerNormMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); const auto epsilon = ctx.Attr("epsilon"); - const auto* x = ctx.Input("X"); - const auto* scale = ctx.Input("Scale"); - const auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("Mean"); - auto* variance = ctx.Output("Variance"); + const auto* x = ctx.Input("X"); + const auto* scale = ctx.Input("Scale"); + const auto* bias = ctx.Input("Bias"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* variance = ctx.Output("Variance"); auto place = ctx.GetPlace(); @@ -151,14 +151,15 @@ class LayerNormGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); - const auto* x = ctx.Input("X"); - const auto* mean = ctx.Input("Mean"); - const auto* variance = ctx.Input("Variance"); - const auto* scale = ctx.Input("Scale"); - const auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dscale = ctx.Output(framework::GradVarName("Scale")); - auto* dbias = ctx.Output(framework::GradVarName("Bias")); + const auto* x = ctx.Input("X"); + const auto* mean = ctx.Input("Mean"); + const auto* variance = ctx.Input("Variance"); + const auto* scale = ctx.Input("Scale"); + const auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dscale = + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); auto place = ctx.GetPlace(); dx->mutable_data(place); diff --git a/paddle/fluid/operators/layer_norm_op_npu.cc b/paddle/fluid/operators/layer_norm_op_npu.cc index 5aed9c76a86ff..146d441fed318 100644 --- a/paddle/fluid/operators/layer_norm_op_npu.cc +++ b/paddle/fluid/operators/layer_norm_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; using DataLayout = framework::DataLayout; @@ -53,12 +53,12 @@ class LayerNormNPUKernel : public framework::OpKernel { using U = LayerNormParamType; const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); const auto epsilon = ctx.Attr("epsilon"); - const auto* x = ctx.Input("X"); - const auto* scale = ctx.Input("Scale"); - const auto* bias = ctx.Input("Bias"); - auto* y = ctx.Output("Y"); - auto* mean = ctx.Output("Mean"); - auto* variance = ctx.Output("Variance"); + const auto* x = ctx.Input("X"); + const auto* scale = ctx.Input("Scale"); + const auto* bias = ctx.Input("Bias"); + auto* y = ctx.Output("Y"); + auto* mean = ctx.Output("Mean"); + auto* variance = ctx.Output("Variance"); const auto& x_dims = x->dims(); std::vector axes; auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); @@ -86,7 +86,7 @@ class LayerNormNPUKernel : public framework::OpKernel { runner.Run(stream); scale = &default_scale; } else { - const_cast(scale)->Resize(phi::make_ddim(axes)); + const_cast(scale)->Resize(phi::make_ddim(axes)); } Tensor default_bias(x->type()); @@ -100,7 +100,7 @@ class LayerNormNPUKernel : public framework::OpKernel { runner.Run(stream); bias = &default_bias; } else { - const_cast(bias)->Resize(phi::make_ddim(axes)); + const_cast(bias)->Resize(phi::make_ddim(axes)); } // cast scale from LayerNormParamType to T if needed @@ -146,7 +146,7 @@ class LayerNormNPUKernel : public framework::OpKernel { y->mutable_data(ctx.GetPlace()); // mean should be of U type - Tensor* tmp_mean = mean; + phi::DenseTensor* tmp_mean = mean; Tensor cast_mean(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && @@ -163,7 +163,7 @@ class LayerNormNPUKernel : public framework::OpKernel { } // same for variance - Tensor* tmp_variance = variance; + phi::DenseTensor* tmp_variance = variance; Tensor cast_variance(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && @@ -219,8 +219,8 @@ class LayerNormNPUKernel : public framework::OpKernel { // revert shape of scale and bias // TODO(zhiqiu): better implementation, use tmp tensor to avoid write input // tensor. - const_cast(scale)->Resize(phi::make_ddim({right})); - const_cast(bias)->Resize(phi::make_ddim({right})); + const_cast(scale)->Resize(phi::make_ddim({right})); + const_cast(bias)->Resize(phi::make_ddim({right})); } }; @@ -230,15 +230,16 @@ class LayerNormGradNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { using U = LayerNormParamType; const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); - const auto* x = ctx.Input("X"); + const auto* x = ctx.Input("X"); const auto& x_dims = x->dims(); - const auto* mean = ctx.Input("Mean"); - const auto* variance = ctx.Input("Variance"); - const auto* scale = ctx.Input("Scale"); - const auto* dy = ctx.Input(framework::GradVarName("Y")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dscale = ctx.Output(framework::GradVarName("Scale")); - auto* dbias = ctx.Output(framework::GradVarName("Bias")); + const auto* mean = ctx.Input("Mean"); + const auto* variance = ctx.Input("Variance"); + const auto* scale = ctx.Input("Scale"); + const auto* dy = ctx.Input(framework::GradVarName("Y")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dscale = + ctx.Output(framework::GradVarName("Scale")); + auto* dbias = ctx.Output(framework::GradVarName("Bias")); auto matrix_dim = phi::flatten_to_2d(x_dims, begin_norm_axis); int right = static_cast(matrix_dim[1]); @@ -268,8 +269,9 @@ class LayerNormGradNPUKernel : public framework::OpKernel { } auto mean_dims = mean->dims(); - const_cast(mean)->Resize(phi::make_ddim({new_shape})); - const_cast(variance)->Resize(phi::make_ddim({new_shape})); + const_cast(mean)->Resize(phi::make_ddim({new_shape})); + const_cast(variance)->Resize( + phi::make_ddim({new_shape})); Tensor default_scale(x->type()); if (!scale) { @@ -282,7 +284,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { runner.Run(stream); scale = &default_scale; } else { - const_cast(scale)->Resize(phi::make_ddim(axes)); + const_cast(scale)->Resize(phi::make_ddim(axes)); } // cast scale from LayerNormParamType to T if needed @@ -358,7 +360,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { dbias->Resize(phi::make_ddim(axes)); // dscale should be of U type - Tensor* tmp_dscale = dscale; + phi::DenseTensor* tmp_dscale = dscale; Tensor cast_dscale(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && @@ -375,7 +377,7 @@ class LayerNormGradNPUKernel : public framework::OpKernel { } // same for dbias - Tensor* tmp_dbias = dbias; + phi::DenseTensor* tmp_dbias = dbias; Tensor cast_dbias(x->type()); if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::FP16 && @@ -427,9 +429,9 @@ class LayerNormGradNPUKernel : public framework::OpKernel { runner_cast_dbias.Run(stream); } - const_cast(mean)->Resize(mean_dims); - const_cast(variance)->Resize(mean_dims); - const_cast(scale)->Resize(phi::make_ddim({right})); + const_cast(mean)->Resize(mean_dims); + const_cast(variance)->Resize(mean_dims); + const_cast(scale)->Resize(phi::make_ddim({right})); dscale->Resize(phi::make_ddim({right})); dbias->Resize(phi::make_ddim({right})); } diff --git a/paddle/fluid/operators/layout_utils.h b/paddle/fluid/operators/layout_utils.h index c15786fc83410..d475eab967d78 100644 --- a/paddle/fluid/operators/layout_utils.h +++ b/paddle/fluid/operators/layout_utils.h @@ -26,12 +26,12 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template inline void ResizeToChannelFirst(const framework::ExecutionContext& context, - const Tensor* input, - Tensor* transformed_input) { + const phi::DenseTensor* input, + phi::DenseTensor* transformed_input) { int dim = input->dims().size() - 2; if (dim == 3) { // input @@ -68,8 +68,8 @@ inline void ResizeToChannelFirst(const framework::ExecutionContext& context, template inline void ResizeToChannelLast(const framework::ExecutionContext& context, - const Tensor* input, - Tensor* transformed_input) { + const phi::DenseTensor* input, + phi::DenseTensor* transformed_input) { int dim = input->dims().size() - 2; if (dim == 3) { // input @@ -106,8 +106,8 @@ inline void ResizeToChannelLast(const framework::ExecutionContext& context, template inline void TransToChannelFirst(const framework::ExecutionContext& context, - const Tensor* input, - Tensor* transformed_input) { + const phi::DenseTensor* input, + phi::DenseTensor* transformed_input) { VLOG(5) << "Why am I called?"; int dim = input->dims().size() - 2; if (dim == 3) { @@ -131,8 +131,8 @@ inline void TransToChannelFirst(const framework::ExecutionContext& context, template inline void TransToChannelLast(const framework::ExecutionContext& context, - const Tensor* input, - Tensor* transformed_input) { + const phi::DenseTensor* input, + phi::DenseTensor* transformed_input) { int dim = input->dims().size() - 2; if (dim == 3) { auto& dev_ctx = context.template device_context(); diff --git a/paddle/fluid/operators/limit_by_capacity_op.cu b/paddle/fluid/operators/limit_by_capacity_op.cu index d14cc0762617e..079d6cd0bfcc6 100644 --- a/paddle/fluid/operators/limit_by_capacity_op.cu +++ b/paddle/fluid/operators/limit_by_capacity_op.cu @@ -29,7 +29,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template __global__ void limit_by_capacity_impl( @@ -54,10 +54,10 @@ template class LimitByCapacityOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto expert_count = context.Input("expert_count"); - auto capacity = context.Input("capacity"); + auto expert_count = context.Input("expert_count"); + auto capacity = context.Input("capacity"); auto n_worker = context.Attr("n_worker"); - auto out = context.Output("Out"); + auto out = context.Output("Out"); auto n_expert = expert_count->numel() / n_worker; const auto place = context.GetPlace(); @@ -68,7 +68,7 @@ class LimitByCapacityOpCUDAKernel : public framework::OpKernel { auto out_data = out->mutable_data(place); const T* ec_data = expert_count->data(); - framework::Tensor capacity_copy; + phi::DenseTensor capacity_copy; framework::TensorCopy(*capacity, place, dev_ctx, &capacity_copy); T* cap_data = capacity_copy.mutable_data(place); diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index de6daf33f8426..bd48acc3796b1 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -48,20 +48,22 @@ struct ScalarMul { using framework::LoD; using framework::LoDTensor; -using framework::Tensor; template class LinearChainCRFOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* emission_weights = ctx.Input("Emission"); - const Tensor* transition_weights = - ctx.Input("Transition"); - - Tensor* emission_exps = ctx.Output("EmissionExps"); - Tensor* transition_exps = ctx.Output("TransitionExps"); - Tensor* alpha = ctx.Output("Alpha"); - Tensor* ll = ctx.Output("LogLikelihood"); + const phi::DenseTensor* emission_weights = + ctx.Input("Emission"); + const phi::DenseTensor* transition_weights = + ctx.Input("Transition"); + + phi::DenseTensor* emission_exps = + ctx.Output("EmissionExps"); + phi::DenseTensor* transition_exps = + ctx.Output("TransitionExps"); + phi::DenseTensor* alpha = ctx.Output("Alpha"); + phi::DenseTensor* ll = ctx.Output("LogLikelihood"); // Because the computation codes only runs on CPU, here the memory for all // the outputs is FIXED to be allocated on the CPU memory. @@ -70,18 +72,19 @@ class LinearChainCRFOpKernel : public framework::OpKernel { transition_exps->mutable_data(platform::CPUPlace()); auto emission_dims = emission_weights->dims(); - const Tensor* label = ctx.Input("Label"); - Tensor emission_weights_tmp = *emission_weights; - Tensor label_tmp = *label; - Tensor emission_exps_tmp = *emission_exps; - Tensor alpha_tmp = *alpha; + const phi::DenseTensor* label = ctx.Input("Label"); + phi::DenseTensor emission_weights_tmp = *emission_weights; + phi::DenseTensor label_tmp = *label; + phi::DenseTensor emission_exps_tmp = *emission_exps; + phi::DenseTensor alpha_tmp = *alpha; int64_t seq_num = 0; int64_t batch_size; int64_t tag_num; const int64_t* length_data = nullptr; framework::LoD in_lod; if (ctx.HasInput("Length")) { - const Tensor* label_length = ctx.Input("Length"); + const phi::DenseTensor* label_length = + ctx.Input("Length"); length_data = label_length->data(); seq_num = label_length->numel(); PADDLE_ENFORCE_EQ( @@ -125,7 +128,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel { ll->Resize({seq_num, 1}); ll->mutable_data(platform::CPUPlace()); // Now, all the inputs and outputs should be on the CPU memory. - Tensor emission_row_max; + phi::DenseTensor emission_row_max; emission_row_max.mutable_data( phi::make_ddim({static_cast(batch_size), 1}), platform::CPUPlace()); @@ -158,11 +161,15 @@ class LinearChainCRFOpKernel : public framework::OpKernel { log_likelihood[i] = 0.; continue; } - const Tensor one_seq = emission_weights_tmp.Slice(start_pos, end_pos); - Tensor one_seq_row_max = emission_row_max.Slice(start_pos, end_pos); - Tensor one_seq_exps = emission_exps_tmp.Slice(start_pos, end_pos); - const Tensor one_seq_label = label_tmp.Slice(start_pos, end_pos); - Tensor one_seq_alpha = alpha_tmp.Slice(start_pos, end_pos); + const phi::DenseTensor one_seq = + emission_weights_tmp.Slice(start_pos, end_pos); + phi::DenseTensor one_seq_row_max = + emission_row_max.Slice(start_pos, end_pos); + phi::DenseTensor one_seq_exps = + emission_exps_tmp.Slice(start_pos, end_pos); + const phi::DenseTensor one_seq_label = + label_tmp.Slice(start_pos, end_pos); + phi::DenseTensor one_seq_alpha = alpha_tmp.Slice(start_pos, end_pos); log_likelihood[i] = ForwardOneSequence(one_seq, one_seq_row_max, one_seq_exps, @@ -174,13 +181,13 @@ class LinearChainCRFOpKernel : public framework::OpKernel { }; private: - T ForwardOneSequence(const Tensor& emission, - const Tensor& emission_row_max, - const Tensor& emission_exps, - const Tensor& trans_weights, - const Tensor& trans_weight_exps, - const Tensor& label, - Tensor* alpha) const { + T ForwardOneSequence(const phi::DenseTensor& emission, + const phi::DenseTensor& emission_row_max, + const phi::DenseTensor& emission_exps, + const phi::DenseTensor& trans_weights, + const phi::DenseTensor& trans_weight_exps, + const phi::DenseTensor& label, + phi::DenseTensor* alpha) const { const T* x = emission.data(); const T* x_row_max = emission_row_max.data(); const T* x_exps = emission_exps.data(); @@ -243,27 +250,31 @@ template class LinearChainCRFGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* label = ctx.Input("Label"); - const Tensor* emission_exps = ctx.Input("EmissionExps"); - const Tensor* transition_exps = ctx.Input("TransitionExps"); - const Tensor* alpha = ctx.Input("Alpha"); + const phi::DenseTensor* label = ctx.Input("Label"); + const phi::DenseTensor* emission_exps = + ctx.Input("EmissionExps"); + const phi::DenseTensor* transition_exps = + ctx.Input("TransitionExps"); + const phi::DenseTensor* alpha = ctx.Input("Alpha"); const T* ll_grad = - ctx.Input(framework::GradVarName("LogLikelihood"))->data(); - Tensor* emission_grad = - ctx.Output(framework::GradVarName("Emission")); + ctx.Input(framework::GradVarName("LogLikelihood")) + ->data(); + phi::DenseTensor* emission_grad = + ctx.Output(framework::GradVarName("Emission")); auto* emission_grad_data = emission_grad->mutable_data(platform::CPUPlace()); memset(emission_grad_data, 0, emission_grad->numel() * sizeof(T)); - Tensor alpha_tmp = *alpha; - Tensor label_tmp = *label; - Tensor emission_exps_tmp = *emission_exps; - Tensor emission_grad_tmp = *emission_grad; + phi::DenseTensor alpha_tmp = *alpha; + phi::DenseTensor label_tmp = *label; + phi::DenseTensor emission_exps_tmp = *emission_exps; + phi::DenseTensor emission_grad_tmp = *emission_grad; // getting seq_num using padding or not int64_t seq_num = 0; framework::LoD in_lod; const int64_t* length_data = nullptr; if (ctx.HasInput("Length")) { - const Tensor* label_length = ctx.Input("Length"); + const phi::DenseTensor* label_length = + ctx.Input("Length"); length_data = label_length->data(); seq_num = label_length->numel(); auto emission_dims = emission_grad->dims(); @@ -283,8 +294,8 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { seq_num = static_cast(in_lod[0].size() - 1); } - Tensor* transition_grad = - ctx.Output(framework::GradVarName("Transition")); + phi::DenseTensor* transition_grad = + ctx.Output(framework::GradVarName("Transition")); // TODO(caoying) Fix this constraint. When the Input(Emission) is from the // data reader operator, it can have no gradients. @@ -298,7 +309,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // backwark vectors. For a backward vector i (the i-th row of beta), it // captures the unnormalized probabilities of partial sequences starting // at position i. - Tensor beta; + phi::DenseTensor beta; beta.mutable_data(emission_dims, platform::CPUPlace()); if (ctx.HasInput("Length")) { beta.Resize({emission_dims[0] * emission_dims[1], emission_dims[2]}); @@ -318,12 +329,14 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { if (end_pos == start_pos) { continue; } - const Tensor one_seq_emission_exps = + const phi::DenseTensor one_seq_emission_exps = emission_exps_tmp.Slice(start_pos, end_pos); - const Tensor one_seq_label = label_tmp.Slice(start_pos, end_pos); - const Tensor one_seq_alpha = alpha_tmp.Slice(start_pos, end_pos); - Tensor one_seq_beta = beta.Slice(start_pos, end_pos); - Tensor one_seq_emission_grad = + const phi::DenseTensor one_seq_label = + label_tmp.Slice(start_pos, end_pos); + const phi::DenseTensor one_seq_alpha = + alpha_tmp.Slice(start_pos, end_pos); + phi::DenseTensor one_seq_beta = beta.Slice(start_pos, end_pos); + phi::DenseTensor one_seq_emission_grad = emission_grad_tmp.Slice(start_pos, end_pos); BackwardOneSequence(ctx.template device_context(), ll_grad[i], @@ -340,13 +353,13 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { private: void BackwardOneSequence(const phi::CPUContext& ctx, const T ll_grad, - const Tensor& emission_exps, - const Tensor& transition_exps, - const Tensor& alpha, - const Tensor& label, - Tensor* beta, - Tensor* transition_grad, - Tensor* emission_grad) const { + const phi::DenseTensor& emission_exps, + const phi::DenseTensor& transition_exps, + const phi::DenseTensor& alpha, + const phi::DenseTensor& label, + phi::DenseTensor* beta, + phi::DenseTensor* transition_grad, + phi::DenseTensor* emission_grad) const { const T* w_exps = transition_exps.data(); const T* x_exps = emission_exps.data(); const int64_t* label_value = label.data(); @@ -406,7 +419,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel { // TODO(caoying): Fix this to avoid using this local variable if we can // profile the training process. - Tensor tmp; + phi::DenseTensor tmp; tmp.mutable_data(beta->dims(), platform::CPUPlace()); auto tmp_mat = framework::EigenMatrix::From(tmp); auto prob = beta_mat * x_exps_mat; diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index 6766c9559be16..d9dcfbed5967f 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -37,7 +37,7 @@ class LinspaceOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (platform::is_xpu_place(tensor.place())) { return framework::OpKernelType( diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index b0a6b073b4a02..1e03bb806f192 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -71,7 +71,7 @@ class LoDResetOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index 1d687232974ef..5049653b1fb50 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -53,7 +53,7 @@ class LoDResetKernel : public framework::OpKernel { return; // early return, since lod already set } else { auto* lod = lod_t->data(); - framework::Tensor lod_cpu; + phi::DenseTensor lod_cpu; if (platform::is_gpu_place(lod_t->place())) { framework::TensorCopySync(*lod_t, platform::CPUPlace(), &lod_cpu); lod = lod_cpu.data(); @@ -115,8 +115,8 @@ template class LoDResetGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* d_out = ctx.Input(framework::GradVarName("Out")); - auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_x = ctx.Output(framework::GradVarName("X")); framework::TensorCopy(*d_out, d_out->place(), d_x); } diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index ab4d95c592fc1..ee7be39580b49 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -46,14 +46,14 @@ struct LoDTensorToArrayFunctorImpl { struct LoDTensorToArrayFunctor : public std::unary_function { - std::vector ref_inputs_; - mutable std::vector outputs_; - const framework::Tensor &input_; + std::vector ref_inputs_; + mutable std::vector outputs_; + const phi::DenseTensor &input_; - explicit LoDTensorToArrayFunctor(const framework::Tensor &input) + explicit LoDTensorToArrayFunctor(const phi::DenseTensor &input) : input_(input) {} - void AddOutput(framework::Tensor *t) { + void AddOutput(phi::DenseTensor *t) { outputs_.emplace_back(t); ref_inputs_.emplace_back(t); } @@ -152,7 +152,7 @@ class LoDTensorToArrayOp : public framework::OperatorBase { } } - std::map outputs; + std::map outputs; for (size_t i = 0; i < max_seq_len; ++i) { auto &ranges = copy_ranges[i]; diff --git a/paddle/fluid/operators/log_loss_op_npu.cc b/paddle/fluid/operators/log_loss_op_npu.cc index 465992588cfd3..47c6bef196be1 100644 --- a/paddle/fluid/operators/log_loss_op_npu.cc +++ b/paddle/fluid/operators/log_loss_op_npu.cc @@ -20,14 +20,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void LogLossAdds(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, + const phi::DenseTensor* x, float scale, - Tensor* y) { + phi::DenseTensor* y) { // Calculate y = x + scale y->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scale}}); @@ -37,9 +37,9 @@ void LogLossAdds(const platform::Place& place, template void LogLossMuls(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, + const phi::DenseTensor* x, float scale, - Tensor* y) { + phi::DenseTensor* y) { // Calculate y = x + scale y->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scale}}); @@ -49,9 +49,9 @@ void LogLossMuls(const platform::Place& place, template void LogLossBCE(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, - const Tensor* y, - Tensor* z) { + const phi::DenseTensor* x, + const phi::DenseTensor* y, + phi::DenseTensor* z) { z->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("BinaryCrossEntropy", @@ -64,10 +64,10 @@ void LogLossBCE(const platform::Place& place, template void LogLossBCEGrad(const platform::Place& place, const aclrtStream& stream, - const Tensor* x, - const Tensor* y, - const Tensor* dout, - Tensor* dx) { + const phi::DenseTensor* x, + const phi::DenseTensor* y, + const phi::DenseTensor* dout, + phi::DenseTensor* dx) { dx->mutable_data(x->dims(), place); const auto& runner = NpuOpRunner("BinaryCrossEntropyGrad", @@ -81,9 +81,9 @@ template class LogLossNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* y = ctx.Output("Loss"); - auto* pred = ctx.Input("Predicted"); - auto* label = ctx.Input("Labels"); + auto* y = ctx.Output("Loss"); + auto* pred = ctx.Input("Predicted"); + auto* label = ctx.Input("Labels"); auto epsilon = static_cast(ctx.Attr("epsilon")); auto place = ctx.GetPlace(); @@ -104,10 +104,11 @@ template class LogLossGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* pred = ctx.Input("Predicted"); - auto* label = ctx.Input("Labels"); - auto* dloss = ctx.Input(framework::GradVarName("Loss")); - auto* dpred = ctx.Output(framework::GradVarName("Predicted")); + auto* pred = ctx.Input("Predicted"); + auto* label = ctx.Input("Labels"); + auto* dloss = ctx.Input(framework::GradVarName("Loss")); + auto* dpred = + ctx.Output(framework::GradVarName("Predicted")); auto epsilon = static_cast(ctx.Attr("epsilon")); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/log_loss_op_xpu.cc b/paddle/fluid/operators/log_loss_op_xpu.cc index 62095dc3524fe..59e0c15678247 100644 --- a/paddle/fluid/operators/log_loss_op_xpu.cc +++ b/paddle/fluid/operators/log_loss_op_xpu.cc @@ -17,15 +17,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class LogLossXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* predict = ctx.Input("Predicted"); - auto* labels = ctx.Input("Labels"); - auto* loss = ctx.Output("Loss"); + auto* predict = ctx.Input("Predicted"); + auto* labels = ctx.Input("Labels"); + auto* loss = ctx.Output("Loss"); auto epsilon = static_cast(ctx.Attr("epsilon")); loss->mutable_data(ctx.GetPlace()); int n = predict->numel(); @@ -43,10 +43,11 @@ template class LogLossGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* predict = ctx.Input("Predicted"); - auto* labels = ctx.Input("Labels"); - auto* dloss = ctx.Input(framework::GradVarName("Loss")); - auto* dpred = ctx.Output(framework::GradVarName("Predicted")); + auto* predict = ctx.Input("Predicted"); + auto* labels = ctx.Input("Labels"); + auto* dloss = ctx.Input(framework::GradVarName("Loss")); + auto* dpred = + ctx.Output(framework::GradVarName("Predicted")); if (!dpred) { return; } diff --git a/paddle/fluid/operators/log_softmax_op_npu.cc b/paddle/fluid/operators/log_softmax_op_npu.cc index acdc4db14bed5..b86786b8a3170 100644 --- a/paddle/fluid/operators/log_softmax_op_npu.cc +++ b/paddle/fluid/operators/log_softmax_op_npu.cc @@ -25,8 +25,8 @@ template class LogSoftmaxNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Out = ctx.Output("Out"); const int rank = X->dims().size(); const int axis = phi::funcs::CanonicalAxis(ctx.Attr("axis"), rank); Out->mutable_data(ctx.GetPlace()); @@ -44,9 +44,9 @@ template class LogSoftmaxGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* Out = ctx.Input("Out"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); + auto* Out = ctx.Input("Out"); + auto* dOut = ctx.Input(framework::GradVarName("Out")); + auto* dX = ctx.Output(framework::GradVarName("X")); const int rank = dOut->dims().size(); const int axis = phi::funcs::CanonicalAxis(ctx.Attr("axis"), rank); diff --git a/paddle/fluid/operators/lookup_table_dequant_op.h b/paddle/fluid/operators/lookup_table_dequant_op.h index 8a2dceacb2877..e002a031a795a 100644 --- a/paddle/fluid/operators/lookup_table_dequant_op.h +++ b/paddle/fluid/operators/lookup_table_dequant_op.h @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h index 31a3e40f12e82..c8964647ce372 100644 --- a/paddle/fluid/operators/lookup_table_op.h +++ b/paddle/fluid/operators/lookup_table_op.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index 7b4ed84fc209b..41be6b34e6e5b 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -78,13 +78,13 @@ __global__ void LookupTableV2Grad(T *table, template struct LookupTableV2CUDAFunctor { LookupTableV2CUDAFunctor(const framework::ExecutionContext &context, - const framework::Tensor *ids_t) + const phi::DenseTensor *ids_t) : context_(context), ids_t_(ids_t) {} template void apply() { - auto *table_t = context_.Input("W"); - auto *output_t = context_.Output("Out"); + auto *table_t = context_.Input("W"); + auto *output_t = context_.Output("Out"); int64_t padding_idx = context_.Attr("padding_idx"); size_t N = table_t->dims()[0]; @@ -111,14 +111,14 @@ struct LookupTableV2CUDAFunctor { private: const framework::ExecutionContext &context_; - const framework::Tensor *ids_t_; + const phi::DenseTensor *ids_t_; }; template class LookupTableV2CUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const auto *ids_t = context.Input("Ids"); + const auto *ids_t = context.Input("Ids"); LookupTableV2CUDAFunctor functor(context, ids_t); framework::VisitIntDataType(framework::TransToProtoVarType(ids_t->dtype()), functor); @@ -137,7 +137,7 @@ __global__ void InputTypeConvert(const InT *in_ids, template struct LookupTableV2GradCUDAFunctor { LookupTableV2GradCUDAFunctor(const framework::ExecutionContext &context, - const framework::Tensor *ids_t) + const phi::DenseTensor *ids_t) : context_(context), ids_t_(ids_t) {} template @@ -148,9 +148,9 @@ struct LookupTableV2GradCUDAFunctor { // Since paddings are not trainable and fixed in forward, the gradient of // paddings makes no sense and we don't deal with it in backward. if (is_sparse) { - auto *table = context_.Input("W"); + auto *table = context_.Input("W"); auto *d_output = - context_.Input(framework::GradVarName("Out")); + context_.Input(framework::GradVarName("Out")); auto *d_table = context_.Output(framework::GradVarName("W")); @@ -206,9 +206,9 @@ struct LookupTableV2GradCUDAFunctor { } else { auto d_output_t = - context_.Input(framework::GradVarName("Out")); + context_.Input(framework::GradVarName("Out")); auto d_table_t = - context_.Output(framework::GradVarName("W")); + context_.Output(framework::GradVarName("W")); int N = d_table_t->dims()[0]; int D = d_table_t->dims()[1]; @@ -236,14 +236,14 @@ struct LookupTableV2GradCUDAFunctor { private: const framework::ExecutionContext &context_; - const framework::Tensor *ids_t_; + const phi::DenseTensor *ids_t_; }; template class LookupTableV2GradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const auto *ids_t = context.Input("Ids"); + const auto *ids_t = context.Input("Ids"); LookupTableV2GradCUDAFunctor functor(context, ids_t); framework::VisitIntDataType(framework::TransToProtoVarType(ids_t->dtype()), functor); diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index 1e12b00ebb944..49ef1c282f016 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; using DDim = framework::DDim; @@ -151,7 +151,7 @@ template class LookupTableV2Kernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const auto *ids = context.Input("Ids"); + const auto *ids = context.Input("Ids"); LookupTableV2CPUFunctor functor(context, ids); framework::VisitIntDataType(framework::TransToProtoVarType(ids->dtype()), functor); @@ -272,7 +272,7 @@ template class LookupTableV2GradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - const auto *ids = context.Input("Ids"); + const auto *ids = context.Input("Ids"); LookupTableV2GradCPUFunctor functor(context, ids); framework::VisitIntDataType(framework::TransToProtoVarType(ids->dtype()), functor); diff --git a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc index 2cda715f14efa..282b8581ca482 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_mlu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_mlu.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class LookupTableV2MLUKernel : public framework::OpKernel { diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc index 842bbd2c672ee..41d53a3e531ca 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; constexpr int64_t kNoPadding = -1; template diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index ca2fba56697fc..b2ef8f0370e37 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -27,15 +27,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; using DataLayout = framework::DataLayout; template struct LRNFunctor { void operator()(const framework::ExecutionContext& ctx, - const framework::Tensor& input, - framework::Tensor* out, - framework::Tensor* mid, + const phi::DenseTensor& input, + phi::DenseTensor* out, + phi::DenseTensor* mid, int N, int C, int H, @@ -49,7 +48,7 @@ struct LRNFunctor { auto blas = phi::funcs::GetBlas(ctx); phi::funcs::Transpose transpose; auto& dev_ctx = ctx.template device_context(); - Tensor in_transpose, mid_transpose, out_transpose; + phi::DenseTensor in_transpose, mid_transpose, out_transpose; // if channel_last, transpose to channel_first if (data_layout == DataLayout::kNHWC) { auto in_dims = input.dims(); @@ -72,7 +71,7 @@ struct LRNFunctor { T* odata = out_transpose.data(); T* mdata = mid_transpose.data(); - Tensor squared; + phi::DenseTensor squared; T* sdata = squared.mutable_data({1, C + n - 1, H, W}, place); std::memset(sdata, 0, sizeof(T) * squared.numel()); for (int i = 0; i < mid->numel(); ++i) { @@ -122,11 +121,11 @@ template struct LRNFunctor; template struct LRNGradFunctor { void operator()(const framework::ExecutionContext& ctx, - const framework::Tensor& x, - const framework::Tensor& out, - const framework::Tensor& mid, - framework::Tensor* x_g, - const framework::Tensor& out_g, + const phi::DenseTensor& x, + const phi::DenseTensor& out, + const phi::DenseTensor& mid, + phi::DenseTensor* x_g, + const phi::DenseTensor& out_g, int N, int C, int H, @@ -241,7 +240,7 @@ class LRNOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && @@ -375,7 +374,7 @@ class LRNOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && diff --git a/paddle/fluid/operators/lrn_op.cu b/paddle/fluid/operators/lrn_op.cu index 8c95cf1d0c9da..4bd0074328189 100644 --- a/paddle/fluid/operators/lrn_op.cu +++ b/paddle/fluid/operators/lrn_op.cu @@ -110,9 +110,9 @@ void CrossMapNormal(const framework::ExecutionContext& ctx, template struct LRNFunctor { void operator()(const framework::ExecutionContext& ctx, - const framework::Tensor& input, - framework::Tensor* out, - framework::Tensor* mid, + const phi::DenseTensor& input, + phi::DenseTensor* out, + phi::DenseTensor* mid, int N, int C, int H, @@ -238,11 +238,11 @@ void CrossMapNormalGrad(const framework::ExecutionContext& ctx, template struct LRNGradFunctor { void operator()(const framework::ExecutionContext& ctx, - const framework::Tensor& x, - const framework::Tensor& out, - const framework::Tensor& mid, - framework::Tensor* x_g, - const framework::Tensor& out_g, + const phi::DenseTensor& x, + const phi::DenseTensor& out, + const phi::DenseTensor& mid, + phi::DenseTensor* x_g, + const phi::DenseTensor& out_g, int N, int C, int H, diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index 890542f0ed1b2..1b8d2c04f69a3 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -29,9 +29,9 @@ using DataLayout = framework::DataLayout; template struct LRNFunctor { void operator()(const framework::ExecutionContext& ctx, - const framework::Tensor& input, - framework::Tensor* out, - framework::Tensor* mid, + const phi::DenseTensor& input, + phi::DenseTensor* out, + phi::DenseTensor* mid, int N, int C, int H, @@ -46,14 +46,14 @@ struct LRNFunctor { template class LRNKernel : public framework::OpKernel { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; // f(x) = x * ( k + alpha * SUM((x)^2) )^(-beta) // x represents inputs // f(x) represents outputs void Compute(const framework::ExecutionContext& ctx) const override { // input - const Tensor& x = *ctx.Input("X"); + const phi::DenseTensor& x = *ctx.Input("X"); auto x_dims = x.dims(); const std::string data_layout_str = ctx.Attr("data_format"); @@ -65,11 +65,11 @@ class LRNKernel : public framework::OpKernel { int H = (data_layout != DataLayout::kNHWC ? x_dims[2] : x_dims[1]); int W = (data_layout != DataLayout::kNHWC ? x_dims[3] : x_dims[2]); - Tensor* out = ctx.Output("Out"); + phi::DenseTensor* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); // MidOut save the intermediate result for backward - Tensor* mid = ctx.Output("MidOut"); + phi::DenseTensor* mid = ctx.Output("MidOut"); mid->mutable_data(ctx.GetPlace()); int n = ctx.Attr("n"); @@ -104,11 +104,11 @@ class LRNKernel : public framework::OpKernel { template struct LRNGradFunctor { void operator()(const framework::ExecutionContext& ctx, - const framework::Tensor& x, - const framework::Tensor& out, - const framework::Tensor& mid, - framework::Tensor* x_g, - const framework::Tensor& out_g, + const phi::DenseTensor& x, + const phi::DenseTensor& out, + const phi::DenseTensor& mid, + phi::DenseTensor* x_g, + const phi::DenseTensor& out_g, int N, int C, int H, @@ -141,17 +141,18 @@ struct LRNGradFunctor { template class LRNGradKernel : public framework::OpKernel { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor& x = *ctx.Input("X"); - const Tensor& out = *ctx.Input("Out"); - const Tensor& out_g = *ctx.Input(framework::GradVarName("Out")); - const Tensor& mid = *ctx.Input("MidOut"); + const phi::DenseTensor& x = *ctx.Input("X"); + const phi::DenseTensor& out = *ctx.Input("Out"); + const phi::DenseTensor& out_g = + *ctx.Input(framework::GradVarName("Out")); + const phi::DenseTensor& mid = *ctx.Input("MidOut"); const std::string data_layout_str = ctx.Attr("data_format"); const framework::DataLayout data_layout = framework::StringToDataLayout(data_layout_str); - auto x_g = ctx.Output(framework::GradVarName("X")); + auto x_g = ctx.Output(framework::GradVarName("X")); x_g->mutable_data(ctx.GetPlace()); auto x_dims = x.dims(); diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 01e381dc7a3cd..b864919259f59 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -25,13 +25,13 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, + const phi::DenseTensor& src, framework::Vector index_lod, - framework::Tensor* dst, + phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); @@ -45,11 +45,11 @@ class LSTMKernel : public framework::OpKernel { bool is_test = ctx.Attr("is_test"); auto* input = ctx.Input("Input"); - auto* weight = ctx.Input("Weight"); - auto* bias = ctx.Input("Bias"); + auto* weight = ctx.Input("Weight"); + auto* bias = ctx.Input("Bias"); - auto* hidden_t0 = ctx.Input("H0"); - auto* cell_t0 = ctx.Input("C0"); + auto* hidden_t0 = ctx.Input("H0"); + auto* cell_t0 = ctx.Input("C0"); LoDTensor* batch_gate = nullptr; LoDTensor batch_gate_temp; @@ -205,8 +205,8 @@ class LSTMGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* input = ctx.Input("Input"); - auto* weight = ctx.Input("Weight"); - auto* bias = ctx.Input("Bias"); + auto* weight = ctx.Input("Weight"); + auto* bias = ctx.Input("Bias"); auto* hidden_out = ctx.Input("Hidden"); auto* cell_out = ctx.Input("Cell"); @@ -217,14 +217,15 @@ class LSTMGradKernel : public framework::OpKernel { auto* hidden_g = ctx.Input(framework::GradVarName("Hidden")); auto* in_g = ctx.Output(framework::GradVarName("Input")); - auto* weight_g = ctx.Output(framework::GradVarName("Weight")); - auto* bias_g = ctx.Output(framework::GradVarName("Bias")); + auto* weight_g = + ctx.Output(framework::GradVarName("Weight")); + auto* bias_g = ctx.Output(framework::GradVarName("Bias")); - auto* h0 = ctx.Input("H0"); - auto* c0 = ctx.Input("C0"); + auto* h0 = ctx.Input("H0"); + auto* c0 = ctx.Input("C0"); - auto* h0_g = ctx.Output(framework::GradVarName("H0")); - auto* c0_g = ctx.Output(framework::GradVarName("C0")); + auto* h0_g = ctx.Output(framework::GradVarName("H0")); + auto* c0_g = ctx.Output(framework::GradVarName("C0")); auto& device_ctx = ctx.template device_context(); phi::funcs::SetConstant zero; diff --git a/paddle/fluid/operators/lstm_unit_op.cu b/paddle/fluid/operators/lstm_unit_op.cu index 29486400a6f2d..ffc6e42587f1c 100644 --- a/paddle/fluid/operators/lstm_unit_op.cu +++ b/paddle/fluid/operators/lstm_unit_op.cu @@ -107,10 +107,10 @@ class LstmUnitOpCUDAKernel : public framework::OpKernel { true, paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); - auto* x_tensor = ctx.Input("X"); - auto* c_prev_tensor = ctx.Input("C_prev"); - auto* c_tensor = ctx.Output("C"); - auto* h_tensor = ctx.Output("H"); + auto* x_tensor = ctx.Input("X"); + auto* c_prev_tensor = ctx.Input("C_prev"); + auto* c_tensor = ctx.Output("C"); + auto* h_tensor = ctx.Output("H"); auto forget_bias = static_cast(ctx.Attr("forget_bias")); @@ -140,17 +140,20 @@ class LstmUnitGradOpCUDAKernel : public framework::OpKernel { true, paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); - auto x_tensor = ctx.Input("X"); - auto c_prev_tensor = ctx.Input("C_prev"); - auto c_tensor = ctx.Input("C"); - auto h_tensor = ctx.Input("H"); + auto x_tensor = ctx.Input("X"); + auto c_prev_tensor = ctx.Input("C_prev"); + auto c_tensor = ctx.Input("C"); + auto h_tensor = ctx.Input("H"); - auto hdiff_tensor = ctx.Input(framework::GradVarName("H")); - auto cdiff_tensor = ctx.Input(framework::GradVarName("C")); + auto hdiff_tensor = + ctx.Input(framework::GradVarName("H")); + auto cdiff_tensor = + ctx.Input(framework::GradVarName("C")); - auto xdiff_tensor = ctx.Output(framework::GradVarName("X")); + auto xdiff_tensor = + ctx.Output(framework::GradVarName("X")); auto c_prev_diff_tensor = - ctx.Output(framework::GradVarName("C_prev")); + ctx.Output(framework::GradVarName("C_prev")); auto* X = x_tensor->data(); auto* C_prev = c_prev_tensor->data(); diff --git a/paddle/fluid/operators/lstm_unit_op.h b/paddle/fluid/operators/lstm_unit_op.h index a135ee1369de8..abb2eb1620dbe 100644 --- a/paddle/fluid/operators/lstm_unit_op.h +++ b/paddle/fluid/operators/lstm_unit_op.h @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - template inline T sigmoid(T x) { return 1. / (1. + exp(-x)); @@ -44,10 +42,10 @@ class LstmUnitKernel : public framework::OpKernel { true, paddle::platform::errors::PreconditionNotMet("It must use CPUPlace.")); - auto* x_tensor = ctx.Input("X"); - auto* c_prev_tensor = ctx.Input("C_prev"); - auto* c_tensor = ctx.Output("C"); - auto* h_tensor = ctx.Output("H"); + auto* x_tensor = ctx.Input("X"); + auto* c_prev_tensor = ctx.Input("C_prev"); + auto* c_tensor = ctx.Output("C"); + auto* h_tensor = ctx.Output("H"); auto forget_bias = static_cast(ctx.Attr("forget_bias")); @@ -89,16 +87,19 @@ class LstmUnitGradKernel : public framework::OpKernel { true, paddle::platform::errors::PreconditionNotMet("It must use CPUPlace.")); - auto x_tensor = ctx.Input("X"); - auto c_prev_tensor = ctx.Input("C_prev"); - auto c_tensor = ctx.Input("C"); + auto x_tensor = ctx.Input("X"); + auto c_prev_tensor = ctx.Input("C_prev"); + auto c_tensor = ctx.Input("C"); - auto hdiff_tensor = ctx.Input(framework::GradVarName("H")); - auto cdiff_tensor = ctx.Input(framework::GradVarName("C")); + auto hdiff_tensor = + ctx.Input(framework::GradVarName("H")); + auto cdiff_tensor = + ctx.Input(framework::GradVarName("C")); - auto xdiff_tensor = ctx.Output(framework::GradVarName("X")); + auto xdiff_tensor = + ctx.Output(framework::GradVarName("X")); auto c_prev_diff_tensor = - ctx.Output(framework::GradVarName("C_prev")); + ctx.Output(framework::GradVarName("C_prev")); auto* X = x_tensor->data(); auto* C_prev = c_prev_tensor->data(); diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index a00d0f7f36545..298f54944bbe6 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -30,7 +30,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using platform::Transform; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, + const phi::DenseTensor& src, framework::Vector index, - framework::Tensor* dst, + phi::DenseTensor* dst, bool indexed_src) { phi::funcs::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); @@ -108,12 +108,12 @@ class LSTMPKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* input = ctx.Input("Input"); - auto* weight = ctx.Input("Weight"); - auto* proj_weight = ctx.Input("ProjWeight"); - auto* bias = ctx.Input("Bias"); + auto* weight = ctx.Input("Weight"); + auto* proj_weight = ctx.Input("ProjWeight"); + auto* bias = ctx.Input("Bias"); - auto* hidden_t0 = ctx.Input("H0"); - auto* cell_t0 = ctx.Input("C0"); + auto* hidden_t0 = ctx.Input("H0"); + auto* cell_t0 = ctx.Input("C0"); auto proj_clip = static_cast(ctx.Attr("proj_clip")); auto cell_clip = static_cast(ctx.Attr("cell_clip")); @@ -306,9 +306,9 @@ class LSTMPGradKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& ctx) const override { - auto* weight = ctx.Input("Weight"); - auto* proj_weight = ctx.Input("ProjWeight"); - auto* bias = ctx.Input("Bias"); + auto* weight = ctx.Input("Weight"); + auto* proj_weight = ctx.Input("ProjWeight"); + auto* bias = ctx.Input("Bias"); auto* proj_out = ctx.Input("Projection"); auto* cell_out = ctx.Input("Cell"); @@ -324,16 +324,17 @@ class LSTMPGradKernel : public framework::OpKernel { ctx.Input(framework::GradVarName("Projection")); auto* in_g = ctx.Output(framework::GradVarName("Input")); - auto* weight_g = ctx.Output(framework::GradVarName("Weight")); + auto* weight_g = + ctx.Output(framework::GradVarName("Weight")); auto* proj_weight_g = - ctx.Output(framework::GradVarName("ProjWeight")); - auto* bias_g = ctx.Output(framework::GradVarName("Bias")); + ctx.Output(framework::GradVarName("ProjWeight")); + auto* bias_g = ctx.Output(framework::GradVarName("Bias")); - auto* h0 = ctx.Input("H0"); - auto* c0 = ctx.Input("C0"); + auto* h0 = ctx.Input("H0"); + auto* c0 = ctx.Input("C0"); - auto* h0_g = ctx.Output(framework::GradVarName("H0")); - auto* c0_g = ctx.Output(framework::GradVarName("C0")); + auto* h0_g = ctx.Output(framework::GradVarName("H0")); + auto* c0_g = ctx.Output(framework::GradVarName("C0")); auto& device_ctx = ctx.template device_context(); phi::funcs::SetConstant zero; diff --git a/paddle/fluid/operators/margin_rank_loss_op.h b/paddle/fluid/operators/margin_rank_loss_op.h index c1bf44510766b..4968f093f5629 100644 --- a/paddle/fluid/operators/margin_rank_loss_op.h +++ b/paddle/fluid/operators/margin_rank_loss_op.h @@ -38,12 +38,12 @@ template class MarginRankLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* out_t = ctx.Output("Out"); - auto* act_t = ctx.Output("Activated"); + auto* out_t = ctx.Output("Out"); + auto* act_t = ctx.Output("Activated"); - auto* label_t = ctx.Input("Label"); - auto* x1_t = ctx.Input("X1"); - auto* x2_t = ctx.Input("X2"); + auto* label_t = ctx.Input("Label"); + auto* x1_t = ctx.Input("X1"); + auto* x2_t = ctx.Input("X2"); out_t->mutable_data(ctx.GetPlace()); act_t->mutable_data(ctx.GetPlace()); @@ -71,9 +71,9 @@ class MarginRankLossGradKernel : public framework::OpKernel { auto* d_x2_t = ctx.Output(framework::GradVarName("X2")); - auto* act_t = ctx.Input("Activated"); - auto* d_out_t = ctx.Input(framework::GradVarName("Out")); - auto* label_t = ctx.Input("Label"); + auto* act_t = ctx.Input("Activated"); + auto* d_out_t = ctx.Input(framework::GradVarName("Out")); + auto* label_t = ctx.Input("Label"); auto d_out = framework::EigenVector::Flatten(*d_out_t); auto act = framework::EigenVector::Flatten(*act_t); diff --git a/paddle/fluid/operators/marker_op.cu b/paddle/fluid/operators/marker_op.cu index 3b52788514b91..7c6fe79ab7ff1 100644 --- a/paddle/fluid/operators/marker_op.cu +++ b/paddle/fluid/operators/marker_op.cu @@ -40,8 +40,8 @@ class MarkerOpCUDAKernel : public framework::OpKernel { VLOG(3) << "marker role: " << marker_role << " marker position: " << marker_pos; - framework::Tensor A; - framework::Tensor B; + phi::DenseTensor A; + phi::DenseTensor B; auto* in_temp = A.mutable_data({32, 1}, ctx.GetPlace()); auto* out_temp = B.mutable_data({32, 1}, ctx.GetPlace()); platform::RecordEvent record_event( diff --git a/paddle/fluid/operators/masked_select_op_mlu.cc b/paddle/fluid/operators/masked_select_op_mlu.cc index 279096b762ca8..50c9973721836 100644 --- a/paddle/fluid/operators/masked_select_op_mlu.cc +++ b/paddle/fluid/operators/masked_select_op_mlu.cc @@ -22,9 +22,9 @@ template class MaskedSelectedMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("X"); - auto mask = ctx.Input("Mask"); - auto out = ctx.Output("Y"); + auto input = ctx.Input("X"); + auto mask = ctx.Input("Mask"); + auto out = ctx.Output("Y"); auto input_dim = input->dims(); auto mask_dim = mask->dims(); @@ -66,9 +66,9 @@ template class MaskedSelectedGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto mask = ctx.Input("Mask"); - auto y_grad = ctx.Input(framework::GradVarName("Y")); - auto x_grad = ctx.Output(framework::GradVarName("X")); + auto mask = ctx.Input("Mask"); + auto y_grad = ctx.Input(framework::GradVarName("Y")); + auto x_grad = ctx.Output(framework::GradVarName("X")); auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/masked_select_op_npu.cc b/paddle/fluid/operators/masked_select_op_npu.cc index 653da86d81c23..df8a32273297b 100644 --- a/paddle/fluid/operators/masked_select_op_npu.cc +++ b/paddle/fluid/operators/masked_select_op_npu.cc @@ -22,9 +22,9 @@ template class MaskedSelectedNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("X"); - auto mask = ctx.Input("Mask"); - auto out = ctx.Output("Y"); + auto input = ctx.Input("X"); + auto mask = ctx.Input("Mask"); + auto out = ctx.Output("Y"); auto input_dim = input->dims(); auto mask_dim = mask->dims(); @@ -111,9 +111,9 @@ template class MaskedSelectedGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto mask = ctx.Input("Mask"); - auto y_grad = ctx.Input(framework::GradVarName("Y")); - auto x_grad = ctx.Output(framework::GradVarName("X")); + auto mask = ctx.Input("Mask"); + auto y_grad = ctx.Input(framework::GradVarName("Y")); + auto x_grad = ctx.Output(framework::GradVarName("X")); x_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index 820e754049a23..12538b72128fe 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; @@ -244,7 +244,7 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* w = ctx.Input("W"); + auto* w = ctx.Input("W"); auto* out = ctx.Output("Out"); auto* tmp = ctx.Output("Tmp"); @@ -324,7 +324,7 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* y = ctx.Input("Y"); - auto* w = ctx.Input("W"); + auto* w = ctx.Input("W"); auto* tmp = ctx.Input("Tmp"); int dim_t = ctx.Attr("dim_t"); @@ -391,7 +391,7 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel { auto blas = phi::funcs::GetBlas(ctx); auto* t_data = w->data(); - auto* d_w = ctx.Output(framework::GradVarName("W")); + auto* d_w = ctx.Output(framework::GradVarName("W")); auto* t_diff = d_w->mutable_data(ctx.GetPlace()); memset(t_diff, 0.0, w->dims()[0] * w->dims()[1] * w->dims()[2] * sizeof(T)); // bottom_diff diff --git a/paddle/fluid/operators/match_matrix_tensor_op.h b/paddle/fluid/operators/match_matrix_tensor_op.h index b067d1c028bd3..72e99222ddffb 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.h +++ b/paddle/fluid/operators/match_matrix_tensor_op.h @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class MatchMatrixTensorOP : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index 2b607ade728c4..fcb92a7ac7f38 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -32,7 +32,7 @@ class BeamSearchFunctor { const framework::LoDTensor *scores, framework::LoDTensor *selected_ids, framework::LoDTensor *selected_scores, - framework::Tensor *parent_idx, + phi::DenseTensor *parent_idx, size_t level, size_t beam_size, int end_id, diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 80af6f673c40f..02be32bf146e0 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -412,7 +412,7 @@ class BeamSearchFunctor { const framework::LoDTensor* scores, framework::LoDTensor* selected_ids, framework::LoDTensor* selected_scores, - framework::Tensor* parent_idx, + phi::DenseTensor* parent_idx, size_t level, size_t beam_size, int end_id, diff --git a/paddle/fluid/operators/math/beam_search.h b/paddle/fluid/operators/math/beam_search.h index d444b0abb4798..c6c05434c0b4a 100644 --- a/paddle/fluid/operators/math/beam_search.h +++ b/paddle/fluid/operators/math/beam_search.h @@ -112,7 +112,7 @@ class BeamSearchFunctor { const framework::LoDTensor* scores, framework::LoDTensor* selected_ids, framework::LoDTensor* selected_scores, - framework::Tensor* parent_idx, + phi::DenseTensor* parent_idx, size_t level, size_t beam_size, int end_id, diff --git a/paddle/fluid/operators/math/beam_search_npu.cc b/paddle/fluid/operators/math/beam_search_npu.cc index b49d4e848b067..1daf97194ed52 100644 --- a/paddle/fluid/operators/math/beam_search_npu.cc +++ b/paddle/fluid/operators/math/beam_search_npu.cc @@ -41,7 +41,7 @@ class BeamSearchFunctor { const framework::LoDTensor* scores, framework::LoDTensor* selected_ids, framework::LoDTensor* selected_scores, - framework::Tensor* parent_idx, + phi::DenseTensor* parent_idx, size_t level, size_t beam_size, int end_id, @@ -185,8 +185,8 @@ class BeamSearchFunctor { "FillD", {true_tmp_tensor}, {second_pos_true_tensors}, fill_attr2); runner_fill_true_tensors.Run(stream); - std::vector concat_inputs = {first_pos_false_tensors, - second_pos_true_tensors}; + std::vector concat_inputs = {first_pos_false_tensors, + second_pos_true_tensors}; std::vector concat_names = {"x0", "x1"}; NpuOpRunner runner_concat_false_true{"ConcatD", {concat_inputs}, @@ -403,8 +403,8 @@ class BeamSearchFunctor { sorted_score_indices.Resize( phi::make_ddim({num_seqs, static_cast(beam_size), 1})); - std::vector concat_inputs2 = {batch_ids, - sorted_score_indices}; + std::vector concat_inputs2 = {batch_ids, + sorted_score_indices}; std::vector concat_names = {"x0", "x1"}; NpuOpRunner runner_concat_score_indices{"ConcatD", {concat_inputs2}, @@ -429,8 +429,8 @@ class BeamSearchFunctor { phi::make_ddim({num_seqs, static_cast(beam_size), 2})); gather_nd_id_indices.mutable_data(place); - std::vector concat_inputs3 = {batch_ids, - cast_sort_tmp_indices}; + std::vector concat_inputs3 = {batch_ids, + cast_sort_tmp_indices}; NpuOpRunner runner_concat_id_indices{"ConcatD", {concat_inputs3}, {gather_nd_id_indices}, diff --git a/paddle/fluid/operators/math/beam_search_xpu.cc b/paddle/fluid/operators/math/beam_search_xpu.cc index 9904c142e5a08..ad8edd5d23f81 100644 --- a/paddle/fluid/operators/math/beam_search_xpu.cc +++ b/paddle/fluid/operators/math/beam_search_xpu.cc @@ -62,7 +62,7 @@ class BeamSearchFunctor { const framework::LoDTensor *scores, framework::LoDTensor *selected_ids, framework::LoDTensor *selected_scores, - framework::Tensor *parent_idx, + phi::DenseTensor *parent_idx, size_t level, size_t beam_size, int end_id, diff --git a/paddle/fluid/operators/math/concat_and_split.cc b/paddle/fluid/operators/math/concat_and_split.cc index 603584629cc92..a74b345ec835f 100644 --- a/paddle/fluid/operators/math/concat_and_split.cc +++ b/paddle/fluid/operators/math/concat_and_split.cc @@ -41,9 +41,9 @@ template class ConcatFunctor { public: void operator()(const phi::CPUContext& context, - const std::vector& input, + const std::vector& input, int axis, - framework::Tensor* output) { + phi::DenseTensor* output) { phi::funcs::ConcatFunctor functor; functor(context, input, axis, output); } @@ -57,10 +57,10 @@ template class SplitFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& input, - const std::vector& ref_inputs, + const phi::DenseTensor& input, + const std::vector& ref_inputs, const int axis, - std::vector* outputs) { + std::vector* outputs) { phi::funcs::SplitFunctor functor; functor(context, input, ref_inputs, axis, outputs); } @@ -75,9 +75,9 @@ template class ConcatFunctor { public: void operator()(const platform::XPUDeviceContext& context, - const std::vector& input, + const std::vector& input, int axis, - framework::Tensor* output) { + phi::DenseTensor* output) { int dev_id = context.GetPlace().GetDeviceId(); platform::XPUDeviceGuard guard(dev_id); @@ -115,10 +115,10 @@ template class SplitFunctor { public: void operator()(const platform::XPUDeviceContext& context, - const framework::Tensor& input, - const std::vector& ref_inputs, + const phi::DenseTensor& input, + const std::vector& ref_inputs, const int axis, - std::vector* outputs) { + std::vector* outputs) { int dev_id = context.GetPlace().GetDeviceId(); platform::XPUDeviceGuard guard(dev_id); @@ -168,9 +168,9 @@ template class ConcatFunctor { public: void operator()(const platform::NPUDeviceContext& context, - const std::vector& input, + const std::vector& input, int axis, - framework::Tensor* output) { + phi::DenseTensor* output) { int dev_id = context.GetPlace().GetDeviceId(); platform::NPUDeviceGuard guard(dev_id); @@ -192,10 +192,10 @@ template class SplitFunctor { public: void operator()(const platform::NPUDeviceContext& context, - const framework::Tensor& input, - const std::vector& ref_inputs, + const phi::DenseTensor& input, + const std::vector& ref_inputs, const int axis, - std::vector* outputs) { + std::vector* outputs) { if (input.numel() == 0) { return; } @@ -246,9 +246,9 @@ template class ConcatFunctor { public: void operator()(const platform::MLUDeviceContext& context, - const std::vector& input, + const std::vector& input, int axis, - framework::Tensor* output) { + phi::DenseTensor* output) { int dev_id = context.GetPlace().GetDeviceId(); platform::MLUDeviceGuard guard(dev_id); @@ -287,10 +287,10 @@ template class SplitFunctor { public: void operator()(const platform::MLUDeviceContext& context, - const framework::Tensor& input, - const std::vector& ref_inputs, + const phi::DenseTensor& input, + const std::vector& ref_inputs, const int axis, - std::vector* outputs) { + std::vector* outputs) { if (input.numel() == 0) { return; } diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu index 11508fd2d1eae..69b183aa9a01c 100644 --- a/paddle/fluid/operators/math/concat_and_split.cu +++ b/paddle/fluid/operators/math/concat_and_split.cu @@ -26,9 +26,9 @@ template class ConcatFunctor { public: void operator()(const phi::GPUContext& context, - const std::vector& input, + const std::vector& input, int axis, - framework::Tensor* output) { + phi::DenseTensor* output) { phi::funcs::ConcatFunctor functor; functor(context, input, axis, output); } @@ -42,10 +42,10 @@ template class SplitFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const std::vector& ref_inputs, + const phi::DenseTensor& input, + const std::vector& ref_inputs, int axis, - std::vector* outputs) { + std::vector* outputs) { phi::funcs::SplitFunctor functor; functor(context, input, ref_inputs, axis, outputs); } diff --git a/paddle/fluid/operators/math/concat_and_split.h b/paddle/fluid/operators/math/concat_and_split.h index 66727b8cdbd94..83513caa14573 100644 --- a/paddle/fluid/operators/math/concat_and_split.h +++ b/paddle/fluid/operators/math/concat_and_split.h @@ -38,9 +38,9 @@ template class ConcatFunctor { public: void operator()(const DeviceContext& context, - const std::vector& input, + const std::vector& input, int axis, - framework::Tensor* output); + phi::DenseTensor* output); }; /* @@ -59,10 +59,10 @@ template class SplitFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - const std::vector& ref_inputs, + const phi::DenseTensor& input, + const std::vector& ref_inputs, int axis, - std::vector* outputs); + std::vector* outputs); }; } // namespace math diff --git a/paddle/fluid/operators/math/concat_test.cc b/paddle/fluid/operators/math/concat_test.cc index ccbe1c2aeed00..b350167cfb46b 100644 --- a/paddle/fluid/operators/math/concat_test.cc +++ b/paddle/fluid/operators/math/concat_test.cc @@ -29,13 +29,13 @@ limitations under the License. */ */ template void ConcatCase1(DeviceContext* context) { - paddle::framework::Tensor input_a_cpu; - paddle::framework::Tensor input_b_cpu; - paddle::framework::Tensor out_cpu; + phi::DenseTensor input_a_cpu; + phi::DenseTensor input_b_cpu; + phi::DenseTensor out_cpu; - paddle::framework::Tensor input_a; - paddle::framework::Tensor input_b; - paddle::framework::Tensor out; + phi::DenseTensor input_a; + phi::DenseTensor input_b; + phi::DenseTensor out; auto dim_a = phi::make_ddim({2, 3, 4}); auto dim_b = phi::make_ddim({3, 3, 4}); @@ -73,7 +73,7 @@ void ConcatCase1(DeviceContext* context) { paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b); } - std::vector input; + std::vector input; input.push_back(input_a); input.push_back(input_b); @@ -134,13 +134,13 @@ void ConcatCase1(DeviceContext* context) { */ template void ConcatCase2(DeviceContext* context) { - paddle::framework::Tensor input_a_cpu; - paddle::framework::Tensor input_b_cpu; - paddle::framework::Tensor out_cpu; + phi::DenseTensor input_a_cpu; + phi::DenseTensor input_b_cpu; + phi::DenseTensor out_cpu; - paddle::framework::Tensor input_a; - paddle::framework::Tensor input_b; - paddle::framework::Tensor out; + phi::DenseTensor input_a; + phi::DenseTensor input_b; + phi::DenseTensor out; auto dim_a = phi::make_ddim({2, 3, 4}); auto dim_b = phi::make_ddim({2, 4, 4}); @@ -178,7 +178,7 @@ void ConcatCase2(DeviceContext* context) { paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b); } - std::vector input; + std::vector input; input.push_back(input_a); input.push_back(input_b); @@ -243,13 +243,13 @@ void ConcatCase2(DeviceContext* context) { */ template void ConcatCase3(DeviceContext* context) { - paddle::framework::Tensor input_a_cpu; - paddle::framework::Tensor input_b_cpu; - paddle::framework::Tensor out_cpu; + phi::DenseTensor input_a_cpu; + phi::DenseTensor input_b_cpu; + phi::DenseTensor out_cpu; - paddle::framework::Tensor input_a; - paddle::framework::Tensor input_b; - paddle::framework::Tensor out; + phi::DenseTensor input_a; + phi::DenseTensor input_b; + phi::DenseTensor out; auto dim_a = phi::make_ddim({2, 3, 4}); auto dim_b = phi::make_ddim({2, 3, 5}); @@ -287,7 +287,7 @@ void ConcatCase3(DeviceContext* context) { paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b); } - std::vector input; + std::vector input; input.push_back(input_a); input.push_back(input_b); @@ -354,13 +354,13 @@ void ConcatCase3(DeviceContext* context) { */ template void ConcatCase4(DeviceContext* context) { - paddle::framework::Tensor input_a_cpu; - paddle::framework::Tensor input_b_cpu; - paddle::framework::Tensor out_cpu; + phi::DenseTensor input_a_cpu; + phi::DenseTensor input_b_cpu; + phi::DenseTensor out_cpu; - paddle::framework::Tensor input_a; - paddle::framework::Tensor input_b; - paddle::framework::Tensor out; + phi::DenseTensor input_a; + phi::DenseTensor input_b; + phi::DenseTensor out; auto dim_a = phi::make_ddim({2, 3, 4}); auto dim_b = phi::make_ddim({2, 3, 4}); @@ -398,7 +398,7 @@ void ConcatCase4(DeviceContext* context) { paddle::framework::TensorCopySync(input_b_cpu, Place(), &input_b); } - std::vector input; + std::vector input; input.push_back(input_a); input.push_back(input_b); diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index 026b6a9d8fef5..7811bc2854ffd 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -26,7 +26,7 @@ namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; /* @@ -90,14 +90,14 @@ class ContextProjectFunctor { public: void operator()(const DeviceContext& context, const LoDTensor& in, - const Tensor* padding_data, + const phi::DenseTensor* padding_data, bool padding_trainable, const int context_start, const int context_length, const int context_stride, const int up_pad, const int down_pad, - Tensor* col) { + phi::DenseTensor* col) { auto lod_level_0 = in.lod()[0]; math::Im2ColFunctor im2col_ocf; @@ -226,8 +226,8 @@ class ContextProjectGradFunctor { const int down_pad, bool pad_grad, bool input_grad, - Tensor* padding_data, - Tensor* col) { + phi::DenseTensor* padding_data, + phi::DenseTensor* col) { auto lod_level_0 = in.lod()[0]; math::Col2ImFunctor col2im_ocf; diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index 17ff6aff6f93d..f87f5a107e696 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -21,7 +21,7 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -29,9 +29,9 @@ using EigenMatrix = framework::EigenMatrix; template struct HardLabelCrossEntropyCPUFunctorImpl { - HardLabelCrossEntropyCPUFunctorImpl(framework::Tensor* out, - const framework::Tensor* prob, - const framework::Tensor* labels, + HardLabelCrossEntropyCPUFunctorImpl(phi::DenseTensor* out, + const phi::DenseTensor* prob, + const phi::DenseTensor* labels, const int ignore_index, const int axis_dim) : out_(out), @@ -85,9 +85,9 @@ struct HardLabelCrossEntropyCPUFunctorImpl { } private: - framework::Tensor* out_; - const framework::Tensor* prob_; - const framework::Tensor* labels_; + phi::DenseTensor* out_; + const phi::DenseTensor* prob_; + const phi::DenseTensor* labels_; const int ignore_index_; const int axis_dim_; }; @@ -95,9 +95,9 @@ struct HardLabelCrossEntropyCPUFunctorImpl { template void CrossEntropyFunctor::operator()( const DeviceContext& ctx, - framework::Tensor* out, - const framework::Tensor* prob, - const framework::Tensor* labels, + phi::DenseTensor* out, + const phi::DenseTensor* prob, + const phi::DenseTensor* labels, const bool softLabel, const int ignore_index, const int axis_dim) { diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index c366dd6fcef34..0e5b95542455e 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -111,9 +111,9 @@ struct HardLabelCrossEntropyCUDAFunctorImpl { template void CrossEntropyFunctor::operator()( const DeviceContext& ctx, - framework::Tensor* out, - const framework::Tensor* prob, - const framework::Tensor* labels, + phi::DenseTensor* out, + const phi::DenseTensor* prob, + const phi::DenseTensor* labels, const bool softLabel, const int ignore_index, const int axis_dim) { diff --git a/paddle/fluid/operators/math/cross_entropy.h b/paddle/fluid/operators/math/cross_entropy.h index 0de10789ba02e..fba4c2ebc61c2 100644 --- a/paddle/fluid/operators/math/cross_entropy.h +++ b/paddle/fluid/operators/math/cross_entropy.h @@ -61,9 +61,9 @@ template class CrossEntropyFunctor { public: void operator()(const DeviceContext& context, - framework::Tensor* out, - const framework::Tensor* prob, - const framework::Tensor* labels, + phi::DenseTensor* out, + const phi::DenseTensor* prob, + const phi::DenseTensor* labels, const bool softLabel, const int ignore_index, const int axis_dim); diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 9192badedcfff..39b0312e67766 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -35,11 +35,11 @@ class Im2ColFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& im, + const phi::DenseTensor& im, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im.dims().size(), 3, @@ -82,11 +82,11 @@ class Col2ImFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* im, + phi::DenseTensor* im, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im->dims().size(), 3, @@ -184,11 +184,11 @@ class Im2ColFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& im, + const phi::DenseTensor& im, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im.dims().size(), 3, @@ -259,11 +259,11 @@ class Col2ImFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* im, + phi::DenseTensor* im, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im->dims().size(), 3, diff --git a/paddle/fluid/operators/math/im2col.cu b/paddle/fluid/operators/math/im2col.cu index 5812b5d9b26b1..843e50c50a697 100644 --- a/paddle/fluid/operators/math/im2col.cu +++ b/paddle/fluid/operators/math/im2col.cu @@ -91,11 +91,11 @@ class Im2ColFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& im, + const phi::DenseTensor& im, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im.dims().size(), 3, @@ -228,11 +228,11 @@ class Col2ImFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* im, + phi::DenseTensor* im, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im->dims().size(), 3, @@ -372,11 +372,11 @@ class Im2ColFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& im, + const phi::DenseTensor& im, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im.dims().size(), 3, @@ -485,11 +485,11 @@ class Col2ImFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* im, + phi::DenseTensor* im, const DataLayout data_layout) { PADDLE_ENFORCE_EQ(im->dims().size(), 3, diff --git a/paddle/fluid/operators/math/im2col.h b/paddle/fluid/operators/math/im2col.h index 5e02f166d65be..3cc87ca5d23da 100644 --- a/paddle/fluid/operators/math/im2col.h +++ b/paddle/fluid/operators/math/im2col.h @@ -87,11 +87,11 @@ template class Im2ColFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& im, + const phi::DenseTensor& im, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout = DataLayout::kNCHW); }; @@ -99,11 +99,11 @@ template class Col2ImFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* im, + phi::DenseTensor* im, const DataLayout data_layout = DataLayout::kNCHW); }; diff --git a/paddle/fluid/operators/math/im2col_cfo_cpu.h b/paddle/fluid/operators/math/im2col_cfo_cpu.h index ab560d6d7005b..bef9e0a8449f6 100644 --- a/paddle/fluid/operators/math/im2col_cfo_cpu.h +++ b/paddle/fluid/operators/math/im2col_cfo_cpu.h @@ -27,11 +27,11 @@ namespace math { * Support dilation, stride and padding. */ template -inline void im2col_common(const framework::Tensor& im, +inline void im2col_common(const phi::DenseTensor& im, const std::vector& dilation, const std::vector& stride, const std::vector& padding, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout = DataLayout::kNCHW) { int im_channels = (data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]); @@ -77,8 +77,8 @@ inline void im2col_common(const framework::Tensor& im, */ template inline void im2col_sh1sw1dh1dw1ph0pw0( - const framework::Tensor& im, - framework::Tensor* col, + const phi::DenseTensor& im, + phi::DenseTensor* col, const DataLayout data_layout = DataLayout::kNCHW) { int im_channels = (data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]); @@ -129,8 +129,8 @@ inline void im2col_sh1sw1dh1dw1ph0pw0( * and filter_width == 1 have a special implementation */ template -inline void im2col_sh1sw1dh1dw1ph1pw1(const framework::Tensor& im, - framework::Tensor* col, +inline void im2col_sh1sw1dh1dw1ph1pw1(const phi::DenseTensor& im, + phi::DenseTensor* col, const DataLayout data_layout) { int im_channels = (data_layout != DataLayout::kNHWC ? im.dims()[0] : im.dims()[2]); diff --git a/paddle/fluid/operators/math/im2col_test.cc b/paddle/fluid/operators/math/im2col_test.cc index 09ec777ebb633..70ac7a225d6a3 100644 --- a/paddle/fluid/operators/math/im2col_test.cc +++ b/paddle/fluid/operators/math/im2col_test.cc @@ -22,11 +22,11 @@ limitations under the License. */ template void testIm2col() { - paddle::framework::Tensor input_tmp; - paddle::framework::Tensor input; - paddle::framework::Tensor output_cfo; - paddle::framework::Tensor output_ocf; - paddle::framework::Tensor output_tmp; + phi::DenseTensor input_tmp; + phi::DenseTensor input; + phi::DenseTensor output_cfo; + phi::DenseTensor output_ocf; + phi::DenseTensor output_tmp; /** * input = [0, 1, 2, @@ -180,11 +180,11 @@ void testIm2col() { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template <> void testIm2col() { - paddle::framework::Tensor input_tmp; - paddle::framework::Tensor input; - paddle::framework::Tensor output_cfo; - paddle::framework::Tensor output_ocf; - paddle::framework::Tensor output_tmp; + phi::DenseTensor input_tmp; + phi::DenseTensor input; + phi::DenseTensor output_cfo; + phi::DenseTensor output_ocf; + phi::DenseTensor output_tmp; /** * input = [0, 1, 2, @@ -349,9 +349,9 @@ TEST(math, im2col) { #define PREPARE_IM2COL_CPU \ paddle::platform::CPUPlace place; \ phi::CPUContext context(place); \ - paddle::framework::Tensor input; \ - paddle::framework::Tensor out; \ - paddle::framework::Tensor ref; \ + phi::DenseTensor input; \ + phi::DenseTensor out; \ + phi::DenseTensor ref; \ std::vector padding({ph, pw}); \ std::vector stride({1, 1}); \ std::vector dilation({1, 1}); \ diff --git a/paddle/fluid/operators/math/matrix_bit_code.cc b/paddle/fluid/operators/math/matrix_bit_code.cc index 0648f2497d9d7..aa2779c350ab6 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.cc +++ b/paddle/fluid/operators/math/matrix_bit_code.cc @@ -20,10 +20,10 @@ namespace math { template struct MatrixBitCodeFunctorAdd { - const framework::Tensor &vec_; - framework::Tensor *tmat_; + const phi::DenseTensor &vec_; + phi::DenseTensor *tmat_; - MatrixBitCodeFunctorAdd(const framework::Tensor &vec, framework::Tensor *tmat) + MatrixBitCodeFunctorAdd(const phi::DenseTensor &vec, phi::DenseTensor *tmat) : vec_(vec), tmat_(tmat) {} template @@ -44,18 +44,18 @@ struct MatrixBitCodeFunctorAdd { }; template -void MatrixBitCodeFunctor::Add(const framework::Tensor &vec, - framework::Tensor *tmat) { +void MatrixBitCodeFunctor::Add(const phi::DenseTensor &vec, + phi::DenseTensor *tmat) { MatrixBitCodeFunctorAdd func(vec, tmat); paddle::visit(func, code_table_); } template struct MatrixBitCodeFunctorAddGrad { - const framework::Tensor &tmat_; - framework::Tensor *vec_; - MatrixBitCodeFunctorAddGrad(const framework::Tensor &tmat, - framework::Tensor *vec) + const phi::DenseTensor &tmat_; + phi::DenseTensor *vec_; + MatrixBitCodeFunctorAddGrad(const phi::DenseTensor &tmat, + phi::DenseTensor *vec) : tmat_(tmat), vec_(vec) {} template @@ -76,20 +76,20 @@ struct MatrixBitCodeFunctorAddGrad { }; template -void MatrixBitCodeFunctor::AddGrad(const framework::Tensor &tmat, - framework::Tensor *vec) { +void MatrixBitCodeFunctor::AddGrad(const phi::DenseTensor &tmat, + phi::DenseTensor *vec) { MatrixBitCodeFunctorAddGrad func(tmat, vec); paddle::visit(func, code_table_); } template struct MatrixBitCodeFunctorSum { - const framework::Tensor &tmat_; - framework::Tensor *sum_; + const phi::DenseTensor &tmat_; + phi::DenseTensor *sum_; T scale_sum_; - MatrixBitCodeFunctorSum(const framework::Tensor &tmat, - framework::Tensor *sum, + MatrixBitCodeFunctorSum(const phi::DenseTensor &tmat, + phi::DenseTensor *sum, T scale_sum) : tmat_(tmat), sum_(sum), scale_sum_(scale_sum) {} @@ -117,8 +117,8 @@ struct MatrixBitCodeFunctorSum { }; template -void MatrixBitCodeFunctor::Sum(const framework::Tensor &tmat, - framework::Tensor *sum, +void MatrixBitCodeFunctor::Sum(const phi::DenseTensor &tmat, + phi::DenseTensor *sum, T scale_sum) { MatrixBitCodeFunctorSum func(tmat, sum, scale_sum); paddle::visit(func, code_table_); @@ -126,13 +126,13 @@ void MatrixBitCodeFunctor::Sum(const framework::Tensor &tmat, template struct MatrixBitCodeFunctorMul { - framework::Tensor *tmat_; - const framework::Tensor &weight_; - const framework::Tensor &input_; + phi::DenseTensor *tmat_; + const phi::DenseTensor &weight_; + const phi::DenseTensor &input_; - MatrixBitCodeFunctorMul(framework::Tensor *tmat, - const framework::Tensor &weight, - const framework::Tensor &input) + MatrixBitCodeFunctorMul(phi::DenseTensor *tmat, + const phi::DenseTensor &weight, + const phi::DenseTensor &input) : tmat_(tmat), weight_(weight), input_(input) {} template @@ -160,9 +160,9 @@ struct MatrixBitCodeFunctorMul { }; template -void MatrixBitCodeFunctor::Mul(framework::Tensor *tmat, - const framework::Tensor &weight, - const framework::Tensor &input) { +void MatrixBitCodeFunctor::Mul(phi::DenseTensor *tmat, + const phi::DenseTensor &weight, + const phi::DenseTensor &input) { MatrixBitCodeFunctorMul func(tmat, weight, input); paddle::visit(func, code_table_); } @@ -175,12 +175,12 @@ class ReservedVector : public std::vector { template struct MatrixBitCodeFunctorMulGradWeight { - const framework::Tensor &tmat_; - framework::Tensor *weight_; - const framework::Tensor &input_; - MatrixBitCodeFunctorMulGradWeight(const framework::Tensor &tmat, - framework::Tensor *weight, - const framework::Tensor &input) + const phi::DenseTensor &tmat_; + phi::DenseTensor *weight_; + const phi::DenseTensor &input_; + MatrixBitCodeFunctorMulGradWeight(const phi::DenseTensor &tmat, + phi::DenseTensor *weight, + const phi::DenseTensor &input) : tmat_(tmat), weight_(weight), input_(input) {} template void operator()(const CodeTable &code_table) { @@ -216,22 +216,22 @@ struct MatrixBitCodeFunctorMulGradWeight { }; template -void MatrixBitCodeFunctor::MulGradWeight(const framework::Tensor &tmat, - framework::Tensor *weight, - const framework::Tensor &input) { +void MatrixBitCodeFunctor::MulGradWeight(const phi::DenseTensor &tmat, + phi::DenseTensor *weight, + const phi::DenseTensor &input) { MatrixBitCodeFunctorMulGradWeight func(tmat, weight, input); paddle::visit(func, code_table_); } template struct MatrixBitCodeFunctorMulGradWeightSR { - const framework::Tensor &tmat_; + const phi::DenseTensor &tmat_; phi::SelectedRows *weight_; - const framework::Tensor &input_; + const phi::DenseTensor &input_; - MatrixBitCodeFunctorMulGradWeightSR(const framework::Tensor &tmat, + MatrixBitCodeFunctorMulGradWeightSR(const phi::DenseTensor &tmat, phi::SelectedRows *weight, - const framework::Tensor &input) + const phi::DenseTensor &input) : tmat_(tmat), weight_(weight), input_(input) {} template @@ -271,22 +271,22 @@ struct MatrixBitCodeFunctorMulGradWeightSR { }; template -void MatrixBitCodeFunctor::MulGradWeight(const framework::Tensor &tmat, +void MatrixBitCodeFunctor::MulGradWeight(const phi::DenseTensor &tmat, phi::SelectedRows *weight, - const framework::Tensor &input) { + const phi::DenseTensor &input) { MatrixBitCodeFunctorMulGradWeightSR func(tmat, weight, input); paddle::visit(func, code_table_); } template struct MatrixBitCodeFunctorMulGradError { - const framework::Tensor &tmat_; - const framework::Tensor &weight_; - framework::Tensor *input_; + const phi::DenseTensor &tmat_; + const phi::DenseTensor &weight_; + phi::DenseTensor *input_; - MatrixBitCodeFunctorMulGradError(const framework::Tensor &tmat, - const framework::Tensor &weight, - framework::Tensor *input) + MatrixBitCodeFunctorMulGradError(const phi::DenseTensor &tmat, + const phi::DenseTensor &weight, + phi::DenseTensor *input) : tmat_(tmat), weight_(weight), input_(input) {} template void operator()(const CodeTable &code_table) { @@ -315,18 +315,18 @@ struct MatrixBitCodeFunctorMulGradError { }; template -void MatrixBitCodeFunctor::MulGradError(const framework::Tensor &tmat, - const framework::Tensor &weight, - framework::Tensor *input) { +void MatrixBitCodeFunctor::MulGradError(const phi::DenseTensor &tmat, + const phi::DenseTensor &weight, + phi::DenseTensor *input) { MatrixBitCodeFunctorMulGradError func(tmat, weight, input); paddle::visit(func, code_table_); } template struct MatrixBitCodeFunctorSub { - framework::Tensor *tmat_; + phi::DenseTensor *tmat_; - explicit MatrixBitCodeFunctorSub(framework::Tensor *tmat) : tmat_(tmat) {} + explicit MatrixBitCodeFunctorSub(phi::DenseTensor *tmat) : tmat_(tmat) {} template void operator()(const CodeTable &code_table) { @@ -346,7 +346,7 @@ struct MatrixBitCodeFunctorSub { }; template -void MatrixBitCodeFunctor::Sub(framework::Tensor *tmat) { +void MatrixBitCodeFunctor::Sub(phi::DenseTensor *tmat) { MatrixBitCodeFunctorSub func(tmat); paddle::visit(func, code_table_); } diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/fluid/operators/math/matrix_bit_code.h index 7c9d94aa8713b..eb232940b8552 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.h +++ b/paddle/fluid/operators/math/matrix_bit_code.h @@ -128,8 +128,8 @@ class SimpleCode { template class CustomCode { public: - CustomCode(const framework::Tensor& path_table, - const framework::Tensor& path_code, + CustomCode(const phi::DenseTensor& path_table, + const phi::DenseTensor& path_code, const int64_t* ids, int index) { seq_len_ = path_table.dims()[1]; @@ -188,8 +188,8 @@ class SimpleCodeTable { template class CustomCodeTable { public: - CustomCodeTable(const framework::Tensor& path_table, - const framework::Tensor& path_code, + CustomCodeTable(const phi::DenseTensor& path_table, + const phi::DenseTensor& path_code, const int64_t* ids) : ptable_(path_table), pcode_(path_code), ids_(ids) {} @@ -203,8 +203,8 @@ class CustomCodeTable { } private: - const framework::Tensor& ptable_; - const framework::Tensor& pcode_; + const phi::DenseTensor& ptable_; + const phi::DenseTensor& pcode_; const int64_t* ids_; }; @@ -218,8 +218,8 @@ class MatrixBitCodeFunctor { ids_(ids), code_table_(SimpleCodeTable(num_classes, ids)) {} - MatrixBitCodeFunctor(const framework::Tensor& path_table, - const framework::Tensor& path_code, + MatrixBitCodeFunctor(const phi::DenseTensor& path_table, + const phi::DenseTensor& path_code, const int64_t* ids) : num_classes_(static_cast(path_table.dims()[1])), ids_(ids), @@ -227,47 +227,47 @@ class MatrixBitCodeFunctor { /* For j < code_length tmat(i, j) += vec(0, index(i, j)) */ - void Add(const framework::Tensor& vec, framework::Tensor* tmat); + void Add(const phi::DenseTensor& vec, phi::DenseTensor* tmat); /* For j < code_length vec(0, index(i, j)) += tmat(i, j) */ - void AddGrad(const framework::Tensor& tmat, framework::Tensor* vec); + void AddGrad(const phi::DenseTensor& tmat, phi::DenseTensor* vec); /* For j < code_length sum(i, 0) = \sum_j bit(i, j) * tmat(i, j) */ - void Sum(const framework::Tensor& tmat, framework::Tensor* sum, T scale_sum); + void Sum(const phi::DenseTensor& tmat, phi::DenseTensor* sum, T scale_sum); /* For j < code_length tmat(i, j) -= bit(i, j) */ - void Sub(framework::Tensor* tmat); + void Sub(phi::DenseTensor* tmat); /* For j < code_length input.row(i) += tmat(i, j) * weight.row(index(i, j)) */ - void Mul(framework::Tensor* tmat, - const framework::Tensor& weight, - const framework::Tensor& input); + void Mul(phi::DenseTensor* tmat, + const phi::DenseTensor& weight, + const phi::DenseTensor& input); /* For index(i, j) >= 0: weight.row(index(i, j)) += tmat(i, j) * input.row(i) */ - void MulGradWeight(const framework::Tensor& tmat, - framework::Tensor* weight, - const framework::Tensor& input); + void MulGradWeight(const phi::DenseTensor& tmat, + phi::DenseTensor* weight, + const phi::DenseTensor& input); /* For SelectedRows Weight, For index(i, j) >= 0: weight.row(index(i, j)) += tmat(i, j) * input.row(i) */ - void MulGradWeight(const framework::Tensor& tmat, + void MulGradWeight(const phi::DenseTensor& tmat, phi::SelectedRows* weight, - const framework::Tensor& input); + const phi::DenseTensor& input); /* For j < code_length input.row(i) += tmat(i, j) * weight.row(index(i, j)) */ - void MulGradError(const framework::Tensor& tmat, - const framework::Tensor& weight, - framework::Tensor* input); + void MulGradError(const phi::DenseTensor& tmat, + const phi::DenseTensor& weight, + phi::DenseTensor* input); size_t num_classes_; const int64_t* ids_; diff --git a/paddle/fluid/operators/math/maxouting.cc b/paddle/fluid/operators/math/maxouting.cc index 2205ed51e1913..91ae7d472d931 100644 --- a/paddle/fluid/operators/math/maxouting.cc +++ b/paddle/fluid/operators/math/maxouting.cc @@ -23,8 +23,8 @@ namespace math { // All tensors are in NCHW or NHWC format, and the groups must be greater than 1 template void MaxOutFunctor::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* output, + const phi::DenseTensor& input, + phi::DenseTensor* output, const int groups, const int axis) { const int batch_size = input.dims()[0]; @@ -66,10 +66,10 @@ void MaxOutFunctor::operator()(const DeviceContext& context, template void MaxOutGradFunctor::operator()( const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* input_grad, - const framework::Tensor& output, - const framework::Tensor& output_grad, + const phi::DenseTensor& input, + phi::DenseTensor* input_grad, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, const int groups, const int axis) { const int batch_size = input.dims()[0]; diff --git a/paddle/fluid/operators/math/maxouting.cu b/paddle/fluid/operators/math/maxouting.cu index c84d90897220e..df115fd16966d 100644 --- a/paddle/fluid/operators/math/maxouting.cu +++ b/paddle/fluid/operators/math/maxouting.cu @@ -107,8 +107,8 @@ __global__ void KernelMaxoutGrad(const int nthreads, */ template void MaxOutFunctor::operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* output, + const phi::DenseTensor& input, + phi::DenseTensor* output, const int groups, const int axis) { const int batch_size = input.dims()[0]; @@ -140,10 +140,10 @@ void MaxOutFunctor::operator()(const DeviceContext& context, template void MaxOutGradFunctor::operator()( const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* input_grad, - const framework::Tensor& output, - const framework::Tensor& output_grad, + const phi::DenseTensor& input, + phi::DenseTensor* input_grad, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, const int groups, const int axis) { const int batch_size = input.dims()[0]; diff --git a/paddle/fluid/operators/math/maxouting.h b/paddle/fluid/operators/math/maxouting.h index d1a6f92185cba..f42bbdb0e38ee 100644 --- a/paddle/fluid/operators/math/maxouting.h +++ b/paddle/fluid/operators/math/maxouting.h @@ -26,8 +26,8 @@ template class MaxOutFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* output, + const phi::DenseTensor& input, + phi::DenseTensor* output, const int groups, const int axis = 1); }; @@ -36,10 +36,10 @@ template class MaxOutGradFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* input_grad, - const framework::Tensor& output, - const framework::Tensor& output_grad, + const phi::DenseTensor& input, + phi::DenseTensor* input_grad, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, const int groups, const int axis = 1); }; diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index f18053e297e55..e3cc5a5741b02 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -31,7 +31,7 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template __device__ T gpu_adjust_prob(const T prob, @@ -129,9 +129,9 @@ void GPUSampleWithProb::operator()(const phi::GPUContext& context, const int dict_size, const bool uniq, const std::size_t num_samples, - const Tensor* L, - Tensor* S, - Tensor* P) { + const phi::DenseTensor* L, + phi::DenseTensor* S, + phi::DenseTensor* P) { // UNDERSTAND: dimension issues const auto lbl_dim = L->dims(); const int batch_size = lbl_dim[0]; diff --git a/paddle/fluid/operators/math/sample_prob.h b/paddle/fluid/operators/math/sample_prob.h index ad4d3489c21fe..2464ac25186f0 100644 --- a/paddle/fluid/operators/math/sample_prob.h +++ b/paddle/fluid/operators/math/sample_prob.h @@ -27,7 +27,7 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; /* UNDERSTAND: utility function to adjust probability for unique sampling, return whatever as it is if not using unique samping */ @@ -46,9 +46,9 @@ class SampleWithProb { void operator()(const DeviceContext& context, const Sampler& sampler, const std::size_t num_samples, - const Tensor* L, - Tensor* S, - Tensor* P) { + const phi::DenseTensor* L, + phi::DenseTensor* S, + phi::DenseTensor* P) { // UNDERSTAND: dimension issues const auto& lbl_dim = L->dims(); const int batch_size = lbl_dim[0]; @@ -117,9 +117,9 @@ class GPUSampleWithProb { const int dict_size, const bool uniq, const std::size_t num_samples, - const Tensor* L, - Tensor* S, - Tensor* P); + const phi::DenseTensor* L, + phi::DenseTensor* S, + phi::DenseTensor* P); }; #endif } // namespace math diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 354af32beabee..c1b57899cf7ef 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -113,8 +113,8 @@ template struct SelectedRowsAddTensor { void operator()(const phi::CPUContext& context, const phi::SelectedRows& input1, - const framework::Tensor& input2, - framework::Tensor* output) { + const phi::DenseTensor& input2, + phi::DenseTensor* output) { auto in1_height = input1.height(); const auto& in2_dims = input2.dims(); const auto& out_dims = output->dims(); @@ -280,7 +280,7 @@ template struct SelectedRowsAddToTensor { void operator()(const phi::CPUContext& context, const phi::SelectedRows& input1, - framework::Tensor* input2) { + phi::DenseTensor* input2) { if (UNLIKELY(input1.rows().size() == 0)) { LOG(WARNING) << "input selected rows is empty!"; return; @@ -851,7 +851,7 @@ struct UpdateToTensor { void operator()(const phi::CPUContext& context, const ScatterOps& op, const phi::SelectedRows& input1, - framework::Tensor* input2) { + phi::DenseTensor* input2) { auto in1_height = input1.height(); const auto& in2_dims = input2->dims(); PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index 7fa9dc27db9cd..27ee703ac5a79 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -137,8 +137,8 @@ template struct SelectedRowsAddTensor { void operator()(const phi::GPUContext& context, const phi::SelectedRows& input1, - const framework::Tensor& input2, - framework::Tensor* output) { + const phi::DenseTensor& input2, + phi::DenseTensor* output) { auto in1_height = input1.height(); auto in2_dims = input2.dims(); auto out_dims = output->dims(); @@ -289,7 +289,7 @@ template struct SelectedRowsAddToTensor { void operator()(const phi::GPUContext& context, const phi::SelectedRows& input1, - framework::Tensor* input2) { + phi::DenseTensor* input2) { auto in1_height = input1.height(); auto in2_dims = input2->dims(); PADDLE_ENFORCE_EQ( @@ -591,7 +591,7 @@ struct UpdateToTensor { void operator()(const phi::GPUContext& context, const ScatterOps& op, const phi::SelectedRows& input1, - framework::Tensor* input2) { + phi::DenseTensor* input2) { // NOTE: Use SelectedRowsAddToTensor for better performance // no additional MergeAdd called. MergeAdd merge_func; diff --git a/paddle/fluid/operators/math/selected_rows_functor.h b/paddle/fluid/operators/math/selected_rows_functor.h index cf64b5d77e5be..76df85f0a6807 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.h +++ b/paddle/fluid/operators/math/selected_rows_functor.h @@ -44,8 +44,8 @@ template struct SelectedRowsAddTensor { void operator()(const DeviceContext& context, const phi::SelectedRows& input1, - const framework::Tensor& input2, - framework::Tensor* output); + const phi::DenseTensor& input2, + phi::DenseTensor* output); }; // input2 = input1 + input2 @@ -73,7 +73,7 @@ template struct SelectedRowsAddToTensor { void operator()(const DeviceContext& context, const phi::SelectedRows& input1, - framework::Tensor* input2); + phi::DenseTensor* input2); }; namespace scatter { @@ -115,7 +115,7 @@ struct UpdateToTensor { void operator()(const DeviceContext& context, const ScatterOps& op, const phi::SelectedRows& input1, - framework::Tensor* input2); + phi::DenseTensor* input2); }; } // namespace scatter diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cc index ecb8aa7824724..700050420826d 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cc @@ -79,13 +79,11 @@ TEST(selected_rows_functor, cpu_add) { EXPECT_EQ(out_data[5 * row_numel + 7], 2.0); EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); - std::unique_ptr tensor1{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor1{new phi::DenseTensor()}; tensor1->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); functor(ctx, tensor1.get(), 3.0); - std::unique_ptr tensor2{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor2{new phi::DenseTensor()}; tensor2->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); paddle::operators::math::SelectedRowsAddTensor @@ -174,8 +172,7 @@ TEST(selected_rows_functor, cpu_add_to) { EXPECT_EQ(out_data[5 * row_numel + 7], 2.0); EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); - std::unique_ptr tensor1{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor1{new phi::DenseTensor()}; tensor1->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); functor(ctx, tensor1.get(), 3.0); @@ -475,8 +472,7 @@ TEST(selected_rows_functor, cpu_sum_to) { EXPECT_EQ(out_data[4 * row_numel + 4], 2.0); EXPECT_EQ(out_data[5 * row_numel + 7], 2.0); EXPECT_EQ(out_data[6 * row_numel + 9], 2.0); - std::unique_ptr tensor1{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor1{new phi::DenseTensor()}; tensor1->mutable_data(phi::make_ddim({height, row_numel}), cpu_place); functor(ctx, tensor1.get(), 3.0); paddle::operators::math::SelectedRowsAddToTensor diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc index 746a64ff58cde..7c04b466b006d 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc @@ -79,7 +79,7 @@ TEST(selected_rows_functor, gpu_add) { EXPECT_EQ(out_rows[5], 7); EXPECT_EQ(out_rows[6], 9); - paddle::framework::Tensor out_cpu; + phi::DenseTensor out_cpu; paddle::framework::TensorCopy(*out_value, cpu_place, ctx, &out_cpu); ctx.Wait(); @@ -96,20 +96,18 @@ TEST(selected_rows_functor, gpu_add) { EXPECT_EQ(out_cpu_data[5 * row_numel + 7], 2.0); EXPECT_EQ(out_cpu_data[6 * row_numel + 9], 2.0); - std::unique_ptr tensor1{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor1{new phi::DenseTensor()}; tensor1->mutable_data(phi::make_ddim({height, row_numel}), gpu_place); functor(ctx, tensor1.get(), 3.0); - std::unique_ptr tensor2{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor2{new phi::DenseTensor()}; tensor2->mutable_data(phi::make_ddim({height, row_numel}), gpu_place); paddle::operators::math::SelectedRowsAddTensor add_tensor_functor; add_tensor_functor(ctx, *output, *tensor1, tensor2.get()); - paddle::framework::Tensor tensor2_cpu; + phi::DenseTensor tensor2_cpu; paddle::framework::TensorCopy(*tensor2, cpu_place, ctx, &tensor2_cpu); ctx.Wait(); @@ -184,7 +182,7 @@ TEST(selected_rows_functor, gpu_add_to) { EXPECT_EQ(out_rows[5], 7); EXPECT_EQ(out_rows[6], 9); - paddle::framework::Tensor out_cpu; + phi::DenseTensor out_cpu; paddle::framework::TensorCopy(*out_value, cpu_place, ctx, &out_cpu); ctx.Wait(); @@ -201,8 +199,7 @@ TEST(selected_rows_functor, gpu_add_to) { EXPECT_EQ(out_cpu_data[5 * row_numel + 7], 2.0); EXPECT_EQ(out_cpu_data[6 * row_numel + 9], 2.0); - std::unique_ptr tensor1{ - new paddle::framework::Tensor()}; + std::unique_ptr tensor1{new phi::DenseTensor()}; tensor1->mutable_data(phi::make_ddim({height, row_numel}), gpu_place); functor(ctx, tensor1.get(), 3.0); @@ -210,7 +207,7 @@ TEST(selected_rows_functor, gpu_add_to) { add_to_tensor_functor; add_to_tensor_functor(ctx, *output, tensor1.get()); - paddle::framework::Tensor tensor1_cpu; + phi::DenseTensor tensor1_cpu; paddle::framework::TensorCopy(*tensor1, cpu_place, ctx, &tensor1_cpu); ctx.Wait(); @@ -269,7 +266,7 @@ TEST(selected_rows_functor, gpu_merge_add) { inputs.push_back(selected_rows2.get()); merge_add_functor(ctx, inputs, output.get()); - paddle::framework::Tensor output_cpu; + phi::DenseTensor output_cpu; paddle::framework::TensorCopy(output->value(), cpu_place, ctx, &output_cpu); ctx.Wait(); diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 273f99a5f9691..9575f4e6e2466 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -26,8 +26,8 @@ namespace operators { namespace math { template -void CopyValidData(framework::Tensor* dst_tensor, - const framework::Tensor* src_tensor, +void CopyValidData(phi::DenseTensor* dst_tensor, + const phi::DenseTensor* src_tensor, const framework::Vector& seq_offsets, int pad_seq_len, int step_width, diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index a600c37a89108..bcd683d6ec137 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -24,7 +24,7 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template dims(); auto idx_dims = index->dims(); @@ -121,7 +121,7 @@ class MaxSeqPoolFunctor { const framework::LoDTensor& input, T pad_value, framework::LoDTensor* output, - framework::Tensor* index) { + phi::DenseTensor* index) { auto in_dims = input.dims(); auto out_dims = output->dims(); PADDLE_ENFORCE_GT(in_dims.size(), @@ -180,7 +180,7 @@ class MaxSeqPoolGradFunctor { public: void operator()(const phi::CPUContext& context, const framework::LoDTensor& out_grad, - const framework::Tensor& index, + const phi::DenseTensor& index, framework::LoDTensor* in_grad) { auto og_dims = out_grad.dims(); auto ig_dims = in_grad->dims(); @@ -352,7 +352,7 @@ class SequencePoolFunctor { const framework::LoDTensor& input, framework::LoDTensor* output, bool is_test, - framework::Tensor* index = nullptr) { + phi::DenseTensor* index = nullptr) { if (pooltype == "MAX") { if (is_test) { math::MaxSeqPoolFunctor max_pool; @@ -442,7 +442,7 @@ class SequencePoolGradFunctor { const framework::LoDTensor& out_grad, framework::LoDTensor* in_grad, /* max pool has index */ - const framework::Tensor* index = nullptr) { + const phi::DenseTensor* index = nullptr) { if (pooltype == "MAX") { math::MaxSeqPoolGradFunctor max_pool_grad; max_pool_grad(context, out_grad, *index, in_grad); diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index a5edb1db95c3f..41b322ba23179 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -197,7 +197,7 @@ class SequencePoolFunctor { const framework::LoDTensor& input, framework::LoDTensor* output, bool is_test, - framework::Tensor* index = nullptr) { + phi::DenseTensor* index = nullptr) { auto lod_level = input.lod().size(); auto& lod = input.lod()[lod_level - 1]; const size_t item_dim = output->numel() / output->dims()[0]; @@ -415,7 +415,7 @@ class SequencePoolGradFunctor { const framework::LoDTensor& out_grad, framework::LoDTensor* in_grad, /* max pool has index */ - const framework::Tensor* index = nullptr) { + const phi::DenseTensor* index = nullptr) { auto lod_level = in_grad->lod().size(); auto& lod = in_grad->lod()[lod_level - 1]; const size_t item_dim = in_grad->numel() / in_grad->dims()[0]; diff --git a/paddle/fluid/operators/math/sequence_pooling.h b/paddle/fluid/operators/math/sequence_pooling.h index 378fb3a172add..a82d7ad4f802d 100644 --- a/paddle/fluid/operators/math/sequence_pooling.h +++ b/paddle/fluid/operators/math/sequence_pooling.h @@ -33,7 +33,7 @@ class SequencePoolFunctor { const framework::LoDTensor& input, framework::LoDTensor* output, bool is_test = false, - framework::Tensor* index = nullptr); + phi::DenseTensor* index = nullptr); }; template @@ -44,7 +44,7 @@ class SequencePoolGradFunctor { const framework::LoDTensor& out_grad, framework::LoDTensor* in_grad, /* max pool has index */ - const framework::Tensor* index = nullptr); + const phi::DenseTensor* index = nullptr); }; } // namespace math diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index 9cff64f75607b..422e52351c235 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -92,7 +92,7 @@ void TestSequencePoolingSum(const DeviceContext &context, for (size_t i = 0; i < in_grad.lod()[0].size() - 1; ++i) { int64_t begin = in_grad.lod()[0][i]; int64_t end = in_grad.lod()[0][i + 1]; - paddle::framework::Tensor tmp = in_grad.Slice(begin, end); + phi::DenseTensor tmp = in_grad.Slice(begin, end); for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t m = 0; m != second_dim; ++m) { EXPECT_EQ(tmp.data()[m + j * second_dim], @@ -104,7 +104,7 @@ void TestSequencePoolingSum(const DeviceContext &context, for (size_t i = 0; i < cpu_in_grad.lod()[0].size() - 1; ++i) { int64_t begin = cpu_in_grad.lod()[0][i]; int64_t end = cpu_in_grad.lod()[0][i + 1]; - paddle::framework::Tensor tmp = cpu_in_grad.Slice(begin, end); + phi::DenseTensor tmp = cpu_in_grad.Slice(begin, end); for (int64_t j = 0; j != tmp.numel() / second_dim; ++j) { for (int64_t m = 0; m != second_dim; ++m) { EXPECT_EQ(tmp.data()[m + j * second_dim], diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index 6729b962f2af2..c70e1e3e7405a 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -23,7 +23,7 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using ScopedTensorDescriptor = platform::ScopedTensorDescriptor; using DataLayout = platform::DataLayout; template @@ -32,8 +32,8 @@ using CudnnDataType = platform::CudnnDataType; template void SoftmaxCUDNNFunctor::operator()( const DeviceContext& context, - const framework::Tensor* X, - framework::Tensor* Y) { + const phi::DenseTensor* X, + phi::DenseTensor* Y) { // ------------------- cudnn descriptors --------------------- ScopedTensorDescriptor xDesc; ScopedTensorDescriptor yDesc; @@ -83,9 +83,9 @@ void SoftmaxCUDNNFunctor::operator()( template void SoftmaxGradCUDNNFunctor::operator()( const DeviceContext& context, - const framework::Tensor* Y, - const framework::Tensor* YGrad, - framework::Tensor* XGrad) { + const phi::DenseTensor* Y, + const phi::DenseTensor* YGrad, + phi::DenseTensor* XGrad) { // ------------------- cudnn descriptors --------------------- ScopedTensorDescriptor yDesc; ScopedTensorDescriptor dyDesc; diff --git a/paddle/fluid/operators/math/softmax.h b/paddle/fluid/operators/math/softmax.h index 958244bdbb208..9d25309d146a8 100644 --- a/paddle/fluid/operators/math/softmax.h +++ b/paddle/fluid/operators/math/softmax.h @@ -24,8 +24,8 @@ class SoftmaxFunctor { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* X, - framework::Tensor* Y); + const phi::DenseTensor* X, + phi::DenseTensor* Y); }; template @@ -33,9 +33,9 @@ class SoftmaxGradFunctor { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad); + const phi::DenseTensor* y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad); }; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -43,17 +43,17 @@ template class SoftmaxCUDNNFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor* X, - framework::Tensor* Y); + const phi::DenseTensor* X, + phi::DenseTensor* Y); }; template class SoftmaxGradCUDNNFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor* Y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad); + const phi::DenseTensor* Y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad); }; #endif diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h index 8a0eb2ad7a91c..3ce7374e4d39f 100644 --- a/paddle/fluid/operators/math/softmax_impl.h +++ b/paddle/fluid/operators/math/softmax_impl.h @@ -47,8 +47,8 @@ class SoftmaxEigen { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* X, - framework::Tensor* Y) { + const phi::DenseTensor* X, + phi::DenseTensor* Y) { constexpr int kBatchDim = 0; constexpr int kClassDim = 1; constexpr int kAxisDim = 1; @@ -108,8 +108,8 @@ class SoftmaxEigen { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* X, - framework::Tensor* Y) { + const phi::DenseTensor* X, + phi::DenseTensor* Y) { constexpr int kBatchDim = 0; constexpr int kClassDim = 1; constexpr int kAxisDim = 1; @@ -166,8 +166,8 @@ class SoftmaxEigen { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* X, - framework::Tensor* Y) { + const phi::DenseTensor* X, + phi::DenseTensor* Y) { constexpr int kBatchDim = 0; constexpr int kClassDim = 1; constexpr int kAxisDim = 1; @@ -223,8 +223,8 @@ template void SoftmaxFunctor::operator()( const DeviceContext& context, const int axis_dim, - const framework::Tensor* X, - framework::Tensor* Y) { + const phi::DenseTensor* X, + phi::DenseTensor* Y) { SoftmaxEigen()(context, axis_dim, X, Y); } @@ -237,8 +237,8 @@ class SoftmaxFunctor> { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* X, - framework::Tensor* Y) { + const phi::DenseTensor* X, + phi::DenseTensor* Y) { const auto& in_dims = X->dims(); constexpr int kBatchDim = 0; constexpr int kClassDim = 1; @@ -277,9 +277,9 @@ class SoftmaxGradEigen { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad) { + const phi::DenseTensor* y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad) { auto softmax = EigenMatrix::From(*y); auto softmax_grad = EigenMatrix::From(*y_grad); auto logits_grad = EigenMatrix::From(*x_grad); @@ -312,9 +312,9 @@ class SoftmaxGradEigen { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad) { + const phi::DenseTensor* y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad) { auto softmax = EigenMatrix::From(*y); auto softmax_grad = EigenMatrix::From(*y_grad); auto logits_grad = EigenMatrix::From(*x_grad); @@ -346,9 +346,9 @@ class SoftmaxGradEigen { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad) { + const phi::DenseTensor* y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad) { auto softmax = EigenMatrix::From(*y); auto softmax_grad = EigenMatrix::From(*y_grad); auto logits_grad = EigenMatrix::From(*x_grad); @@ -379,9 +379,9 @@ template void SoftmaxGradFunctor::operator()( const DeviceContext& context, const int axis_dim, - const framework::Tensor* y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad) { + const phi::DenseTensor* y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad) { SoftmaxGradEigen()(context, axis_dim, y, y_grad, x_grad); } @@ -390,9 +390,9 @@ class SoftmaxGradFunctor> { public: void operator()(const DeviceContext& context, const int axis_dim, - const framework::Tensor* y, - const framework::Tensor* y_grad, - framework::Tensor* x_grad) { + const phi::DenseTensor* y, + const phi::DenseTensor* y_grad, + phi::DenseTensor* x_grad) { const auto& out_dims = y->dims(); constexpr int kBatchDim = 0; constexpr int kClassDim = 1; diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index 70f377e42e59f..1bf20c9cc75a1 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -51,7 +51,7 @@ std::vector Tree2ColUtil::construct_patch( return patch; } -void Tree2ColUtil::construct_tree(const framework::Tensor &EdgeSet, +void Tree2ColUtil::construct_tree(const phi::DenseTensor &EdgeSet, std::vector> *tr, size_t *node_count) { const auto &edge_set_dims = EdgeSet.dims(); @@ -87,9 +87,9 @@ template class Tree2ColFunctor { public: void operator()(const phi::CPUContext &context, - const framework::Tensor &EdgeSet, - const framework::Tensor &node_features, - framework::Tensor *patch, + const phi::DenseTensor &EdgeSet, + const phi::DenseTensor &node_features, + phi::DenseTensor *patch, int max_depth) { std::vector> tr; const auto &feature_dims = node_features.dims(); @@ -141,9 +141,9 @@ template class Col2TreeFunctor { public: void operator()(const phi::CPUContext &context, - const framework::Tensor &EdgeSet, - const framework::Tensor &out_grad, - framework::Tensor *in_grad, + const phi::DenseTensor &EdgeSet, + const phi::DenseTensor &out_grad, + phi::DenseTensor *in_grad, int max_depth) { std::vector> tr; const auto &output_dims = out_grad.dims(); diff --git a/paddle/fluid/operators/math/tree2col.cu b/paddle/fluid/operators/math/tree2col.cu index 3aceceac32de2..3b467448ac09d 100644 --- a/paddle/fluid/operators/math/tree2col.cu +++ b/paddle/fluid/operators/math/tree2col.cu @@ -20,7 +20,7 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using Node = paddle::operators::math::TreeNode; template __global__ void tree2col(const T* eta, @@ -54,9 +54,9 @@ template class Tree2ColFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& EdgeSet, - const framework::Tensor& node_features, - framework::Tensor* patch, + const phi::DenseTensor& EdgeSet, + const phi::DenseTensor& node_features, + phi::DenseTensor* patch, int max_depth) { std::vector> tr; auto gpu_place = context.GetPlace(); @@ -131,9 +131,9 @@ template class Col2TreeFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& EdgeSet, - const framework::Tensor& patch_grad, - framework::Tensor* embedding_grad, + const phi::DenseTensor& EdgeSet, + const phi::DenseTensor& patch_grad, + phi::DenseTensor* embedding_grad, int max_depth) { std::vector> tr; auto gpu_place = context.GetPlace(); diff --git a/paddle/fluid/operators/math/tree2col.h b/paddle/fluid/operators/math/tree2col.h index 9509a5cf3b745..154d6c2a90a43 100644 --- a/paddle/fluid/operators/math/tree2col.h +++ b/paddle/fluid/operators/math/tree2col.h @@ -65,7 +65,7 @@ class Tree2ColUtil { static std::vector construct_patch( size_t root, int max_depth, const std::vector> &tr); - static void construct_tree(const framework::Tensor &EdgeSet, + static void construct_tree(const phi::DenseTensor &EdgeSet, std::vector> *tr, size_t *node_count); }; @@ -74,18 +74,18 @@ template class Tree2ColFunctor { public: void operator()(const DeviceContext &context, - const framework::Tensor &EdgeSet, - const framework::Tensor &node_features, - framework::Tensor *patch, + const phi::DenseTensor &EdgeSet, + const phi::DenseTensor &node_features, + phi::DenseTensor *patch, int max_depth); }; template class Col2TreeFunctor { public: void operator()(const DeviceContext &context, - const framework::Tensor &EdgeSet, - const framework::Tensor &out_grad, - framework::Tensor *in_grad, + const phi::DenseTensor &EdgeSet, + const phi::DenseTensor &out_grad, + phi::DenseTensor *in_grad, int max_depth); }; } // namespace math diff --git a/paddle/fluid/operators/math/unpooling.cc b/paddle/fluid/operators/math/unpooling.cc index d119e814585b5..bcfdc876b4b26 100644 --- a/paddle/fluid/operators/math/unpooling.cc +++ b/paddle/fluid/operators/math/unpooling.cc @@ -21,9 +21,9 @@ template class Unpool2dMaxFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - framework::Tensor* output) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* output) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -64,11 +64,11 @@ template class Unpool2dMaxGradFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - const framework::Tensor& output, - const framework::Tensor& output_grad, - framework::Tensor* input_grad) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -110,9 +110,9 @@ template class Unpool3dMaxFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - framework::Tensor* output) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* output) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -156,11 +156,11 @@ template class Unpool3dMaxGradFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - const framework::Tensor& output, - const framework::Tensor& output_grad, - framework::Tensor* input_grad) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; diff --git a/paddle/fluid/operators/math/unpooling.cu b/paddle/fluid/operators/math/unpooling.cu index 253f4cb027938..e3d7abb6e0d71 100644 --- a/paddle/fluid/operators/math/unpooling.cu +++ b/paddle/fluid/operators/math/unpooling.cu @@ -114,9 +114,9 @@ template class Unpool2dMaxFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - framework::Tensor* output) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* output) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -151,11 +151,11 @@ template class Unpool2dMaxGradFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - const framework::Tensor& output, - const framework::Tensor& output_grad, - framework::Tensor* input_grad) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad) { const int batch_size = input.dims()[0]; const int input_height = input.dims()[2]; const int input_width = input.dims()[3]; @@ -192,9 +192,9 @@ template class Unpool3dMaxFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - framework::Tensor* output) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* output) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; @@ -233,11 +233,11 @@ template class Unpool3dMaxGradFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - const framework::Tensor& output, - const framework::Tensor& output_grad, - framework::Tensor* input_grad) { + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad) { const int batch_size = input.dims()[0]; const int input_depth = input.dims()[2]; const int input_height = input.dims()[3]; diff --git a/paddle/fluid/operators/math/unpooling.h b/paddle/fluid/operators/math/unpooling.h index 1b0f52dacd970..11d6f14a2ece3 100644 --- a/paddle/fluid/operators/math/unpooling.h +++ b/paddle/fluid/operators/math/unpooling.h @@ -23,38 +23,38 @@ template class Unpool2dMaxFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - framework::Tensor* output); + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* output); }; template class Unpool2dMaxGradFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - const framework::Tensor& output, - const framework::Tensor& output_grad, - framework::Tensor* input_grad); + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad); }; template class Unpool3dMaxFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - framework::Tensor* output); + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + phi::DenseTensor* output); }; template class Unpool3dMaxGradFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& input, - const framework::Tensor& indices, - const framework::Tensor& output, - const framework::Tensor& output_grad, - framework::Tensor* input_grad); + const phi::DenseTensor& input, + const phi::DenseTensor& indices, + const phi::DenseTensor& output, + const phi::DenseTensor& output_grad, + phi::DenseTensor* input_grad); }; } // namespace math } // namespace operators diff --git a/paddle/fluid/operators/math/vol2col.cc b/paddle/fluid/operators/math/vol2col.cc index 680cd6a344579..041d79ee1f175 100644 --- a/paddle/fluid/operators/math/vol2col.cc +++ b/paddle/fluid/operators/math/vol2col.cc @@ -30,11 +30,11 @@ template class Vol2ColFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& vol, + const phi::DenseTensor& vol, const std::vector& dilations, const std::vector& strides, const std::vector& paddings, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout) const { PADDLE_ENFORCE_EQ(vol.dims().size(), 4, @@ -156,11 +156,11 @@ template class Col2VolFunctor { public: void operator()(const phi::CPUContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilations, const std::vector& strides, const std::vector& paddings, - framework::Tensor* vol, + phi::DenseTensor* vol, const DataLayout data_layout) const { PADDLE_ENFORCE_EQ(vol->dims().size(), 4, diff --git a/paddle/fluid/operators/math/vol2col.cu b/paddle/fluid/operators/math/vol2col.cu index 90c2fcf6e27df..765f31eba34f0 100644 --- a/paddle/fluid/operators/math/vol2col.cu +++ b/paddle/fluid/operators/math/vol2col.cu @@ -104,11 +104,11 @@ __global__ void vol2col(int num_kernels, template void Vol2ColFunctor::operator()( const DeviceContext& context, - const framework::Tensor& vol, + const phi::DenseTensor& vol, const std::vector& dilations, const std::vector& strides, const std::vector& paddings, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout) const { PADDLE_ENFORCE_EQ(vol.dims().size(), 4, @@ -310,11 +310,11 @@ __global__ void col2vol(int num_kernels, template void Col2VolFunctor::operator()( const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilations, const std::vector& strides, const std::vector& paddings, - framework::Tensor* vol, + phi::DenseTensor* vol, const DataLayout data_layout) const { PADDLE_ENFORCE_EQ(vol->dims().size(), 4, diff --git a/paddle/fluid/operators/math/vol2col.h b/paddle/fluid/operators/math/vol2col.h index 92ac7b66a0f5d..a5df8f93382ce 100644 --- a/paddle/fluid/operators/math/vol2col.h +++ b/paddle/fluid/operators/math/vol2col.h @@ -72,11 +72,11 @@ template class Vol2ColFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& vol, + const phi::DenseTensor& vol, const std::vector& dilations, const std::vector& strides, const std::vector& paddings, - framework::Tensor* col, + phi::DenseTensor* col, const DataLayout data_layout = DataLayout::kNCHW) const; }; @@ -84,11 +84,11 @@ template class Col2VolFunctor { public: void operator()(const DeviceContext& context, - const framework::Tensor& col, + const phi::DenseTensor& col, const std::vector& dilations, const std::vector& strides, const std::vector& paddings, - framework::Tensor* vol, + phi::DenseTensor* vol, const DataLayout data_layout = DataLayout::kNCHW) const; }; diff --git a/paddle/fluid/operators/math/vol2col_test.cc b/paddle/fluid/operators/math/vol2col_test.cc index c0c4ed5bb5d69..65db94752b987 100644 --- a/paddle/fluid/operators/math/vol2col_test.cc +++ b/paddle/fluid/operators/math/vol2col_test.cc @@ -21,10 +21,10 @@ limitations under the License. */ template void testVol2col() { - paddle::framework::Tensor input; - paddle::framework::Tensor input_tmp; - paddle::framework::Tensor output; - paddle::framework::Tensor output_tmp; + phi::DenseTensor input; + phi::DenseTensor input_tmp; + phi::DenseTensor output; + phi::DenseTensor output_tmp; auto* place = new Place(); DeviceContext* context = new DeviceContext(*place); @@ -133,10 +133,10 @@ void testVol2col() { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template <> void testVol2col() { - paddle::framework::Tensor input; - paddle::framework::Tensor input_tmp; - paddle::framework::Tensor output; - paddle::framework::Tensor output_tmp; + phi::DenseTensor input; + phi::DenseTensor input_tmp; + phi::DenseTensor output; + phi::DenseTensor output_tmp; auto* place = new paddle::platform::CUDAPlace(); auto* context = new phi::GPUContext(*place); diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index a49ceb42559c5..58fac7b69925b 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -61,10 +61,10 @@ class MatMulKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto &x = GET_DATA_SAFELY( - context.Input("X"), "Input", "X", "MatMul"); + context.Input("X"), "Input", "X", "MatMul"); auto &y = GET_DATA_SAFELY( - context.Input("Y"), "Input", "Y", "MatMul"); - auto *out = context.Output("Out"); + context.Input("Y"), "Input", "Y", "MatMul"); + auto *out = context.Output("Out"); auto &dev_ctx = context.template device_context(); dev_ctx.template Alloc(out, out->numel() * sizeof(T)); @@ -116,7 +116,7 @@ class MatMulKernel : public framework::OpKernel { // Reshape a rank-3 tensor from P x M x N to (P * M) x N. // Identity op if the tensor is not of rank 3. -static framework::Tensor FoldInitDims(const framework::Tensor &input) { +static phi::DenseTensor FoldInitDims(const phi::DenseTensor &input) { auto output = input; auto in_dims = input.dims(); if (in_dims.size() == 3) { @@ -129,13 +129,13 @@ static framework::Tensor FoldInitDims(const framework::Tensor &input) { // (Warning: This requires transposing data and writes into new memory.) // Identity op if the tensor is not of rank 3. template -static framework::Tensor FoldHeadAndLastDims(const DeviceContext &context, - const framework::Tensor &input) { +static phi::DenseTensor FoldHeadAndLastDims(const DeviceContext &context, + const phi::DenseTensor &input) { auto in_dims = input.dims(); if (in_dims.size() != 3) { return input; } - framework::Tensor output; + phi::DenseTensor output; output.Resize({in_dims[1], in_dims[0], in_dims[2]}); output.mutable_data(context.GetPlace()); std::vector axis = {1, 0, 2}; @@ -153,7 +153,7 @@ static framework::Tensor FoldHeadAndLastDims(const DeviceContext &context, * If transposed, `H,W` will be swapped. */ static void ReshapeTensorIntoMatrixSequence( - framework::Tensor *x, const phi::funcs::MatDescriptor &descriptor) { + phi::DenseTensor *x, const phi::funcs::MatDescriptor &descriptor) { int64_t h, w; h = descriptor.height_; w = descriptor.width_; @@ -181,9 +181,9 @@ static void ReshapeTensorIntoMatrixSequence( * If any of `X` and `Y` has batch size BatchSize, the out will have the * BatchSize. */ -static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x, - framework::Tensor *y, - framework::Tensor *out, +static void ReshapeXYOutIntoMatrixSequence(phi::DenseTensor *x, + phi::DenseTensor *y, + phi::DenseTensor *out, bool trans_x, bool trans_y) { auto x_dim = RowMatrixFromVector(x->dims()); @@ -231,11 +231,11 @@ template class MatMulGradKernel : public framework::OpKernel { public: void MatMul(const framework::ExecutionContext &context, - const framework::Tensor &a, + const phi::DenseTensor &a, bool trans_a, - const framework::Tensor &b, + const phi::DenseTensor &b, bool trans_b, - framework::Tensor *out) const { + phi::DenseTensor *out) const { out->mutable_data(context.GetPlace()); auto blas = phi::funcs::GetBlas(context); auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(a.dims(), 0, trans_a); @@ -266,13 +266,13 @@ class MatMulGradKernel : public framework::OpKernel { } void CalcInputGrad(const framework::ExecutionContext &context, - const framework::Tensor &a, + const phi::DenseTensor &a, bool trans_a, bool is_fold_init_dims_a, - const framework::Tensor &b, + const phi::DenseTensor &b, bool trans_b, bool is_fold_init_dims_b, - framework::Tensor *out) const { + phi::DenseTensor *out) const { if (out == nullptr) return; bool need_combine = (a.dims().size() == 3 || b.dims().size() == 3) && out->dims().size() == 2; @@ -293,12 +293,11 @@ class MatMulGradKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext &context) const override { - auto x = *context.Input("X"); - auto y = *context.Input("Y"); - auto dout = - *context.Input(framework::GradVarName("Out")); - auto *dx = context.Output(framework::GradVarName("X")); - auto *dy = context.Output(framework::GradVarName("Y")); + auto x = *context.Input("X"); + auto y = *context.Input("Y"); + auto dout = *context.Input(framework::GradVarName("Out")); + auto *dx = context.Output(framework::GradVarName("X")); + auto *dy = context.Output(framework::GradVarName("Y")); bool transpose_x = context.Attr("transpose_X"); bool transpose_y = context.Attr("transpose_Y"); @@ -370,12 +369,12 @@ template class MatMulDoubleGradKernel : public framework::OpKernel { public: void MatMul(const framework::ExecutionContext &context, - const framework::Tensor &a, + const phi::DenseTensor &a, bool trans_a, - const framework::Tensor &b, + const phi::DenseTensor &b, bool trans_b, bool flag, - framework::Tensor *out) const { + phi::DenseTensor *out) const { out->mutable_data(context.GetPlace()); auto blas = phi::funcs::GetBlas(context); auto mat_dim_a = phi::funcs::CreateMatrixDescriptor(a.dims(), 0, trans_a); @@ -404,14 +403,14 @@ class MatMulDoubleGradKernel : public framework::OpKernel { } void CalcInputGrad(const framework::ExecutionContext &context, - const framework::Tensor &a, + const phi::DenseTensor &a, bool trans_a, bool is_fold_init_dims_a, - const framework::Tensor &b, + const phi::DenseTensor &b, bool trans_b, bool is_fold_init_dims_b, bool flag, - framework::Tensor *out) const { + phi::DenseTensor *out) const { if (out == nullptr) return; bool need_combine = (a.dims().size() == 3 || b.dims().size() == 3) && out->dims().size() == 2; @@ -433,8 +432,8 @@ class MatMulDoubleGradKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext &context) const override { - auto x = *context.Input("X"); - auto y = *context.Input("Y"); + auto x = *context.Input("X"); + auto y = *context.Input("Y"); auto dout = *context.Input("DOut"); auto *ddx = context.Input("DDX"); auto *ddy = context.Input("DDY"); @@ -713,7 +712,7 @@ class MatMulOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input diff --git a/paddle/fluid/operators/matmul_op_mlu.cc b/paddle/fluid/operators/matmul_op_mlu.cc index c5484e2d0406f..e55996903a7d1 100644 --- a/paddle/fluid/operators/matmul_op_mlu.cc +++ b/paddle/fluid/operators/matmul_op_mlu.cc @@ -18,13 +18,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template static void Mul(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const float alpha) { Out->mutable_data(ctx.GetPlace()); @@ -48,9 +48,9 @@ static void Mul(const framework::ExecutionContext& ctx, template static void MatMul2D(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y, const float alpha) { @@ -81,9 +81,9 @@ static void MatMul2D(const framework::ExecutionContext& ctx, template static void MatMulND(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y, const float alpha) { @@ -118,8 +118,8 @@ template static void ReduceDims(const framework::ExecutionContext& ctx, const std::vector& dims, const std::vector& bcast_dims, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { std::vector axes; int64_t size = bcast_dims.size(); int64_t diff = bcast_dims.size() - dims.size(); @@ -162,9 +162,9 @@ template class MatMulMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* Out = ctx.Output("Out"); bool transpose_x = ctx.Attr("transpose_X"); bool transpose_y = ctx.Attr("transpose_Y"); float alpha = static_cast(ctx.Attr("alpha")); @@ -253,11 +253,11 @@ template class MatMulGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dY = ctx.Output(framework::GradVarName("Y")); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* dOut = ctx.Input(framework::GradVarName("Out")); + auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dY = ctx.Output(framework::GradVarName("Y")); bool transpose_x = ctx.Attr("transpose_X"); bool transpose_y = ctx.Attr("transpose_Y"); float alpha = static_cast(ctx.Attr("alpha")); diff --git a/paddle/fluid/operators/matmul_op_npu.cc b/paddle/fluid/operators/matmul_op_npu.cc index e99b21fc696ba..31b352b90f6a8 100644 --- a/paddle/fluid/operators/matmul_op_npu.cc +++ b/paddle/fluid/operators/matmul_op_npu.cc @@ -19,15 +19,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template static void Mul(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const float alpha) { Out->mutable_data(ctx.GetPlace()); @@ -49,9 +49,9 @@ static void Mul(const framework::ExecutionContext& ctx, template static void Dot(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const float alpha) { Out->mutable_data(ctx.GetPlace()); @@ -73,9 +73,9 @@ static void Dot(const framework::ExecutionContext& ctx, template static void MatMul2D(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y, const float alpha) { @@ -107,9 +107,9 @@ static void MatMul2D(const framework::ExecutionContext& ctx, template static void MatMulND(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y, const float alpha) { @@ -143,8 +143,8 @@ static void ReduceDims(const framework::ExecutionContext& ctx, const aclrtStream& stream, const std::vector& dims, const std::vector& brd_dims, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { std::vector axes; int64_t size = brd_dims.size(); int64_t diff = brd_dims.size() - dims.size(); @@ -167,9 +167,9 @@ template class MatMulNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* Out = ctx.Output("Out"); bool transpose_x = ctx.Attr("transpose_X"); bool transpose_y = ctx.Attr("transpose_Y"); float alpha = static_cast(ctx.Attr("alpha")); @@ -312,11 +312,11 @@ template class MatMulGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dY = ctx.Output(framework::GradVarName("Y")); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* dOut = ctx.Input(framework::GradVarName("Out")); + auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dY = ctx.Output(framework::GradVarName("Y")); bool transpose_x = ctx.Attr("transpose_X"); bool transpose_y = ctx.Attr("transpose_Y"); float alpha = static_cast(ctx.Attr("alpha")); diff --git a/paddle/fluid/operators/matmul_op_xpu.cc b/paddle/fluid/operators/matmul_op_xpu.cc index 922bf780add0b..5f9e9459800da 100644 --- a/paddle/fluid/operators/matmul_op_xpu.cc +++ b/paddle/fluid/operators/matmul_op_xpu.cc @@ -23,7 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; template class MatMulXPUKernel : public framework::OpKernel { @@ -31,9 +30,9 @@ class MatMulXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* y = context.Input("Y"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* y = context.Input("Y"); + auto* out = context.Output("Out"); out->mutable_data(context.GetPlace()); bool trans_x = context.Attr("transpose_X"); bool trans_y = context.Attr("transpose_Y"); @@ -86,12 +85,11 @@ class MatMulGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto x = *context.Input("X"); - auto y = *context.Input("Y"); - auto dout = - *context.Input(framework::GradVarName("Out")); - auto* dx = context.Output(framework::GradVarName("X")); - auto* dy = context.Output(framework::GradVarName("Y")); + auto x = *context.Input("X"); + auto y = *context.Input("Y"); + auto dout = *context.Input(framework::GradVarName("Out")); + auto* dx = context.Output(framework::GradVarName("X")); + auto* dy = context.Output(framework::GradVarName("Y")); bool transpose_x = context.Attr("transpose_X"); bool transpose_y = context.Attr("transpose_Y"); float alpha = static_cast(context.Attr("alpha")); diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc index d00004b340d09..876a90e7b9674 100644 --- a/paddle/fluid/operators/matmul_v2_op.cc +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -149,7 +149,7 @@ class MatMulV2Op : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input @@ -224,7 +224,7 @@ class MatMulV2OpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (framework::IsComplexType(expected_kernel_type.data_type_)) { // only promote inputs’s types when contains complex input diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index 8e436dd6afbfb..70bdd0736bf4e 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -39,7 +39,7 @@ namespace operators { // Reshape a rank-3 tensor from P x M x N to (P * M) x N. // Identity op if the tensor is not of rank 3. -static framework::Tensor FoldInitDims(const framework::Tensor& input) { +static phi::DenseTensor FoldInitDims(const phi::DenseTensor& input) { auto output = input; auto in_dims = input.dims(); if (in_dims.size() == 3) { @@ -77,7 +77,7 @@ static framework::DDim ColumnMatrixFromVector(const framework::DDim& y_dim) { * If transposed, `H,W` will be swapped. */ static void ReshapeTensorIntoMatrixSequence( - framework::Tensor* x, const phi::funcs::MatDescriptor& descriptor) { + phi::DenseTensor* x, const phi::funcs::MatDescriptor& descriptor) { int64_t h, w; h = descriptor.height_; w = descriptor.width_; @@ -91,9 +91,9 @@ static void ReshapeTensorIntoMatrixSequence( } } -static void ReshapeXYOutIntoMatrixSequence(framework::Tensor* x, - framework::Tensor* y, - framework::Tensor* out, +static void ReshapeXYOutIntoMatrixSequence(phi::DenseTensor* x, + phi::DenseTensor* y, + phi::DenseTensor* out, bool trans_x, bool trans_y) { auto x_dim = RowMatrixFromVector(x->dims()); diff --git a/paddle/fluid/operators/matmul_v2_op_mlu.cc b/paddle/fluid/operators/matmul_v2_op_mlu.cc index 1ea29500ddc24..134819b7920a0 100644 --- a/paddle/fluid/operators/matmul_v2_op_mlu.cc +++ b/paddle/fluid/operators/matmul_v2_op_mlu.cc @@ -18,13 +18,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template static void Mul(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out) { + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out) { Out->mutable_data(ctx.GetPlace()); MLUCnnlTensorDesc x_desc(X, CNNL_LAYOUT_ARRAY, ToCnnlDataType()); @@ -46,9 +46,9 @@ static void Mul(const framework::ExecutionContext& ctx, template static void MatMul2D(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y) { Out->mutable_data(ctx.GetPlace()); @@ -70,9 +70,9 @@ static void MatMul2D(const framework::ExecutionContext& ctx, template static void MatMul2DwithReduceBatch(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y) { if (!Out->initialized()) { @@ -101,9 +101,9 @@ static void MatMul2DwithReduceBatch(const framework::ExecutionContext& ctx, template static void MatMulND(const framework::ExecutionContext& ctx, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y) { if (!Out->initialized()) { @@ -129,8 +129,8 @@ template static void ReduceDims(const framework::ExecutionContext& ctx, const std::vector& dims, const std::vector& bcast_dims, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { std::vector axes; int64_t size = bcast_dims.size(); int64_t diff = bcast_dims.size() - dims.size(); @@ -173,9 +173,9 @@ template class MatMulV2MLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* Out = ctx.Output("Out"); const bool trans_x = ctx.Attr("trans_x"); const bool trans_y = ctx.Attr("trans_y"); @@ -263,11 +263,11 @@ template class MatMulGradV2MLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dY = ctx.Output(framework::GradVarName("Y")); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* dOut = ctx.Input(framework::GradVarName("Out")); + auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dY = ctx.Output(framework::GradVarName("Y")); const bool trans_x = ctx.Attr("trans_x"); const bool trans_y = ctx.Attr("trans_y"); diff --git a/paddle/fluid/operators/matmul_v2_op_npu.cc b/paddle/fluid/operators/matmul_v2_op_npu.cc index 291894bc30ed9..4df3de71134ed 100644 --- a/paddle/fluid/operators/matmul_v2_op_npu.cc +++ b/paddle/fluid/operators/matmul_v2_op_npu.cc @@ -21,15 +21,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template static void MatMul2D(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y) { Out->mutable_data(ctx.GetPlace()); @@ -44,9 +44,9 @@ static void MatMul2D(const framework::ExecutionContext& ctx, template static void MatMulND(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y) { Out->mutable_data(ctx.GetPlace()); @@ -61,9 +61,9 @@ static void MatMulND(const framework::ExecutionContext& ctx, template <> void MatMulND(const framework::ExecutionContext& ctx, const aclrtStream& stream, - const Tensor& X, - const Tensor& Y, - Tensor* Out, + const phi::DenseTensor& X, + const phi::DenseTensor& Y, + phi::DenseTensor* Out, const bool trans_x, const bool trans_y) { Out->mutable_data(ctx.GetPlace()); @@ -114,8 +114,8 @@ static void ReduceDims(const framework::ExecutionContext& ctx, const aclrtStream& stream, const std::vector& dims, const std::vector& brd_dims, - const Tensor& in, - Tensor* out) { + const phi::DenseTensor& in, + phi::DenseTensor* out) { std::vector axes; int64_t size = brd_dims.size(); int64_t diff = brd_dims.size() - dims.size(); @@ -138,9 +138,9 @@ template class MatMulV2NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* Out = ctx.Output("Out"); const bool trans_x = ctx.Attr("trans_x"); const bool trans_y = ctx.Attr("trans_y"); @@ -276,11 +276,11 @@ template class MatMulV2GradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); - auto* dY = ctx.Output(framework::GradVarName("Y")); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* dOut = ctx.Input(framework::GradVarName("Out")); + auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dY = ctx.Output(framework::GradVarName("Y")); const bool trans_x = ctx.Attr("trans_x"); const bool trans_y = ctx.Attr("trans_y"); diff --git a/paddle/fluid/operators/mean_iou_op.cu b/paddle/fluid/operators/mean_iou_op.cu index ee31607c63ad8..3e7f8a5363ac0 100644 --- a/paddle/fluid/operators/mean_iou_op.cu +++ b/paddle/fluid/operators/mean_iou_op.cu @@ -95,11 +95,11 @@ class MeanIoUCUDAOpKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); auto& place = *dev_ctx.eigen_device(); // get input and output tensor - auto* predictions = ctx.Input("Predictions"); - auto* labels = ctx.Input("Labels"); - auto* out_mean_iou = ctx.Output("OutMeanIou"); - auto* out_wrong = ctx.Output("OutWrong"); - auto* out_correct = ctx.Output("OutCorrect"); + auto* predictions = ctx.Input("Predictions"); + auto* labels = ctx.Input("Labels"); + auto* out_mean_iou = ctx.Output("OutMeanIou"); + auto* out_wrong = ctx.Output("OutWrong"); + auto* out_correct = ctx.Output("OutCorrect"); int num_classes = static_cast(ctx.Attr("num_classes")); // Get data ptr @@ -128,16 +128,16 @@ class MeanIoUCUDAOpKernel : public framework::OpKernel { out_mean_iou_t.device(place) = out_mean_iou_t.constant(0.0f); // collect pre wrong, correct and mean_iou - auto in_mean_ious = ctx.MultiInput("InMeanIou"); + auto in_mean_ious = ctx.MultiInput("InMeanIou"); for (int i = 0; i < in_mean_ious.size(); ++i) { out_mean_iou_t.device(place) += EigenTensor::From(*in_mean_ious[i]); } - auto in_wrongs = ctx.MultiInput("InWrongs"); + auto in_wrongs = ctx.MultiInput("InWrongs"); for (int i = 0; i < in_wrongs.size(); ++i) { out_wrong_t.device(place) += EigenTensor::From(*in_wrongs[i]); } - auto in_corrects = ctx.MultiInput("InCorrects"); + auto in_corrects = ctx.MultiInput("InCorrects"); for (int i = 0; i < in_corrects.size(); ++i) { out_correct_t.device(place) += EigenTensor::From(*in_corrects[i]); } diff --git a/paddle/fluid/operators/mean_iou_op.h b/paddle/fluid/operators/mean_iou_op.h index 0ec92251a8e37..7681af011e663 100644 --- a/paddle/fluid/operators/mean_iou_op.h +++ b/paddle/fluid/operators/mean_iou_op.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template { auto& place = *ctx.template device_context().eigen_device(); // get input and output tensor - auto* predictions = ctx.Input("Predictions"); - auto* labels = ctx.Input("Labels"); - auto* out_mean_iou = ctx.Output("OutMeanIou"); - auto* out_wrong = ctx.Output("OutWrong"); - auto* out_correct = ctx.Output("OutCorrect"); + auto* predictions = ctx.Input("Predictions"); + auto* labels = ctx.Input("Labels"); + auto* out_mean_iou = ctx.Output("OutMeanIou"); + auto* out_wrong = ctx.Output("OutWrong"); + auto* out_correct = ctx.Output("OutCorrect"); int num_classes = static_cast(ctx.Attr("num_classes")); // get data ptr @@ -77,16 +77,16 @@ class MeanIoUKernel : public framework::OpKernel { out_mean_iou_t = out_mean_iou_t.constant(0); // collect pre wrong, correct and mean_iou - auto in_mean_ious = ctx.MultiInput("InMeanIou"); + auto in_mean_ious = ctx.MultiInput("InMeanIou"); for (size_t i = 0; i < in_mean_ious.size(); ++i) { out_mean_iou_t.device(place) += EigenTensor::From(*in_mean_ious[i]); } - auto in_wrongs = ctx.MultiInput("InWrongs"); + auto in_wrongs = ctx.MultiInput("InWrongs"); for (size_t i = 0; i < in_wrongs.size(); ++i) { out_wrong_t.device(place) += EigenTensor::From(*in_wrongs[i]); } - auto in_corrects = ctx.MultiInput("InCorrects"); + auto in_corrects = ctx.MultiInput("InCorrects"); for (size_t i = 0; i < in_corrects.size(); ++i) { out_correct_t.device(place) += EigenTensor::From(*in_corrects[i]); } diff --git a/paddle/fluid/operators/mean_op_mlu.cc b/paddle/fluid/operators/mean_op_mlu.cc index 4301cde33e337..8fea989941c88 100644 --- a/paddle/fluid/operators/mean_op_mlu.cc +++ b/paddle/fluid/operators/mean_op_mlu.cc @@ -20,14 +20,14 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class MeanMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output = context.Output("Out"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); const T* in_data = input->data(); T* out_data = output->mutable_data(context.GetPlace()); @@ -77,14 +77,16 @@ template class MeanMLUGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto output_grad = context.Input(framework::GradVarName("Out")); + auto output_grad = + context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ(output_grad->numel(), 1, platform::errors::InvalidArgument( "Mean Gradient Input Tensor len should be 1. But " "received Out@Grad's elements num is %d.", output_grad->numel())); - auto input_grad = context.Output(framework::GradVarName("X")); + auto input_grad = + context.Output(framework::GradVarName("X")); input_grad->mutable_data(context.GetPlace()); auto in_data = output_grad->data(); diff --git a/paddle/fluid/operators/mean_op_npu.cc b/paddle/fluid/operators/mean_op_npu.cc index 76f1dcb43a3a2..bee3f8b0696b1 100644 --- a/paddle/fluid/operators/mean_op_npu.cc +++ b/paddle/fluid/operators/mean_op_npu.cc @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class MeanNPUKernel : public framework::OpKernel { @@ -49,7 +49,7 @@ class MeanGradNPUKernel : public framework::OpKernel { context.template device_context() .stream(); - auto grad = context.Input(framework::GradVarName("Out")); + auto grad = context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ(grad->numel(), 1, @@ -58,7 +58,7 @@ class MeanGradNPUKernel : public framework::OpKernel { "received Out@Grad's elements num is %d.", grad->numel())); - auto IG = context.Output(framework::GradVarName("X")); + auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); // ones diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc index ed99fd5bf8783..80181779ab347 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.cc +++ b/paddle/fluid/operators/memcpy_d2h_op.cc @@ -39,7 +39,7 @@ class MemcpyD2HOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index b1126fb12818e..8d2cfcff80768 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -40,7 +40,7 @@ class MemcpyH2DOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc index 273b1fe7c9e70..caa4164ee5bc0 100644 --- a/paddle/fluid/operators/memcpy_op.cc +++ b/paddle/fluid/operators/memcpy_op.cc @@ -56,7 +56,7 @@ class MemcpyOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/meshgrid_op.cc b/paddle/fluid/operators/meshgrid_op.cc index 0b95200c12828..7921e8844c112 100644 --- a/paddle/fluid/operators/meshgrid_op.cc +++ b/paddle/fluid/operators/meshgrid_op.cc @@ -25,8 +25,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class MeshgridOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -34,7 +32,7 @@ class MeshgridOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto inputs = ctx.MultiInput("X"); + auto inputs = ctx.MultiInput("X"); auto input_data_type = framework::proto::VarType::Type(0); bool flag = 0; for (auto* input : inputs) { diff --git a/paddle/fluid/operators/meshgrid_op_mlu.cc b/paddle/fluid/operators/meshgrid_op_mlu.cc index 09aaf695f7556..76beb021bc654 100644 --- a/paddle/fluid/operators/meshgrid_op_mlu.cc +++ b/paddle/fluid/operators/meshgrid_op_mlu.cc @@ -22,8 +22,8 @@ template class MeshgridMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto ins = ctx.MultiInput("X"); - auto outs = ctx.MultiOutput("Out"); + auto ins = ctx.MultiInput("X"); + auto outs = ctx.MultiOutput("Out"); PADDLE_ENFORCE_EQ( (ins.size() > 1) && (ins.size() < 7), true, diff --git a/paddle/fluid/operators/meshgrid_op_npu.cc b/paddle/fluid/operators/meshgrid_op_npu.cc index 86e45fb66284a..8c4e67d787e92 100644 --- a/paddle/fluid/operators/meshgrid_op_npu.cc +++ b/paddle/fluid/operators/meshgrid_op_npu.cc @@ -22,8 +22,8 @@ template class MeshgridNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto ins = context.MultiInput("X"); - auto outs = context.MultiOutput("Out"); + auto ins = context.MultiInput("X"); + auto outs = context.MultiOutput("Out"); PADDLE_ENFORCE_EQ( (ins.size() > 1) && (ins.size() < 7), true, @@ -55,7 +55,7 @@ class MeshgridNPUKernel : public framework::OpKernel { view_shape[i] = shape[i]; framework::DDim out_dims_reshape = phi::make_ddim(view_shape); - framework::Tensor reshape_ins_tensor(ins[i]->dtype()); + phi::DenseTensor reshape_ins_tensor(ins[i]->dtype()); reshape_ins_tensor.ShareDataWith(*ins[i]); reshape_ins_tensor.Resize(out_dims_reshape); diff --git a/paddle/fluid/operators/metrics/accuracy_op_mlu.cc b/paddle/fluid/operators/metrics/accuracy_op_mlu.cc index 96ca608a39b93..ec78fb09eab30 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_mlu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_mlu.cc @@ -23,12 +23,12 @@ template class AccuracyMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* indices = ctx.Input("Indices"); - auto* label = ctx.Input("Label"); + auto* indices = ctx.Input("Indices"); + auto* label = ctx.Input("Label"); - auto* accuracy = ctx.Output("Accuracy"); - auto* correct = ctx.Output("Correct"); - auto* total = ctx.Output("Total"); + auto* accuracy = ctx.Output("Accuracy"); + auto* correct = ctx.Output("Correct"); + auto* total = ctx.Output("Total"); int num_samples = indices->dims()[0]; if (num_samples == 0) { diff --git a/paddle/fluid/operators/metrics/accuracy_op_npu.cc b/paddle/fluid/operators/metrics/accuracy_op_npu.cc index 2f6c8f5718eff..a53ba79a4c534 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_npu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_npu.cc @@ -23,13 +23,13 @@ template class AccuracyNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* inference = ctx.Input("Out"); - auto* label = ctx.Input("Label"); - auto* indices = ctx.Input("Indices"); + auto* inference = ctx.Input("Out"); + auto* label = ctx.Input("Label"); + auto* indices = ctx.Input("Indices"); - auto* accuracy = ctx.Output("Accuracy"); - auto* correct = ctx.Output("Correct"); - auto* total = ctx.Output("Total"); + auto* accuracy = ctx.Output("Accuracy"); + auto* correct = ctx.Output("Correct"); + auto* total = ctx.Output("Total"); auto stream = ctx.template device_context() .stream(); diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc index f2c04d8fbcfc7..f3f39a40fbaea 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -21,17 +21,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; template class AccuracyXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* inference = ctx.Input("Out"); - auto* indices = ctx.Input("Indices"); - auto* label = ctx.Input("Label"); - auto* accuracy = ctx.Output("Accuracy"); - auto* correct = ctx.Output("Correct"); - auto* total = ctx.Output("Total"); + auto* inference = ctx.Input("Out"); + auto* indices = ctx.Input("Indices"); + auto* label = ctx.Input("Label"); + auto* accuracy = ctx.Output("Accuracy"); + auto* correct = ctx.Output("Correct"); + auto* total = ctx.Output("Total"); int* correct_data = correct->mutable_data(ctx.GetPlace()); int* total_data = total->mutable_data(ctx.GetPlace()); float* accuracy_data = accuracy->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/metrics/precision_recall_op.h b/paddle/fluid/operators/metrics/precision_recall_op.h index b8a5e49ef5a24..55be510dcd237 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.h +++ b/paddle/fluid/operators/metrics/precision_recall_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -31,13 +31,13 @@ template class PrecisionRecallKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in0 = ctx.Input("Indices"); - auto* in1 = ctx.Input("Labels"); - auto* in2 = ctx.Input("Weights"); - auto* in3 = ctx.Input("StatesInfo"); - auto* out0 = ctx.Output("BatchMetrics"); - auto* out1 = ctx.Output("AccumMetrics"); - auto* out2 = ctx.Output("AccumStatesInfo"); + auto* in0 = ctx.Input("Indices"); + auto* in1 = ctx.Input("Labels"); + auto* in2 = ctx.Input("Weights"); + auto* in3 = ctx.Input("StatesInfo"); + auto* out0 = ctx.Output("BatchMetrics"); + auto* out1 = ctx.Output("AccumMetrics"); + auto* out2 = ctx.Output("AccumStatesInfo"); const int* ids_data = in0->data(); const int* labels_data = in1->data(); diff --git a/paddle/fluid/operators/minus_op.h b/paddle/fluid/operators/minus_op.h index e6dc80c7fedb3..0a576e875a458 100644 --- a/paddle/fluid/operators/minus_op.h +++ b/paddle/fluid/operators/minus_op.h @@ -24,9 +24,9 @@ template class MinusKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* left_tensor = context.Input("X"); - auto* right_tensor = context.Input("Y"); - auto* out_tensor = context.Output("Out"); + auto* left_tensor = context.Input("X"); + auto* right_tensor = context.Input("Y"); + auto* out_tensor = context.Output("Out"); out_tensor->mutable_data(context.GetPlace()); auto& dev = diff --git a/paddle/fluid/operators/miopen_lstm_cache.h b/paddle/fluid/operators/miopen_lstm_cache.h index ec13337d8f0bc..a9a6482fd485c 100644 --- a/paddle/fluid/operators/miopen_lstm_cache.h +++ b/paddle/fluid/operators/miopen_lstm_cache.h @@ -51,7 +51,7 @@ class ScopedRNNBase { const std::vector& sequence_length, size_t* workspace_size, size_t* reserve_size, - framework::Tensor* dropout_state) { + phi::DenseTensor* dropout_state) { int numDirections = is_bidirec_ ? 2 : 1; miopenDataType_t miopen_type = platform::CudnnDataType::type; diff --git a/paddle/fluid/operators/miopen_rnn_cache.h b/paddle/fluid/operators/miopen_rnn_cache.h index b568ffbb09cc8..19255363259b5 100644 --- a/paddle/fluid/operators/miopen_rnn_cache.h +++ b/paddle/fluid/operators/miopen_rnn_cache.h @@ -53,7 +53,7 @@ struct CudnnRNNCache { miopenTensorDescriptor_t dw_desc_; size_t workspace_size_; - framework::Tensor workspace_data_; + phi::DenseTensor workspace_data_; size_t seq_length_; @@ -78,7 +78,7 @@ struct CudnnRNNCache { int seed, int weight_numel, size_t *reserve_size_, - framework::Tensor *dropout_state_, + phi::DenseTensor *dropout_state_, bool initialized, miopenDataType_t miopen_type) { seq_length_ = seq_len; diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 728d86cd94e33..c2a055f96bd4a 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -27,7 +27,7 @@ using dnnl::memory; using dnnl::primitive; using dnnl::stream; using framework::DataLayout; -using framework::Tensor; + using platform::GetMKLDNNFormat; using platform::MKLDNNDeviceContext; using platform::to_void_cast; @@ -62,8 +62,8 @@ void eltwise_forward(const framework::ExecutionContext &ctx, auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - const auto *x = ctx.Input("X"); - auto *out = ctx.Output("Out"); + const auto *x = ctx.Input("X"); + auto *out = ctx.Output("Out"); bool is_inplaced = x->IsSharedBufferWith(*out); @@ -94,9 +94,9 @@ void eltwise_grad(const framework::ExecutionContext &ctx, auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - const auto *x = ctx.Input("X"); - const auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); + const auto *x = ctx.Input("X"); + const auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); platform::ActivationMKLDNNHandler handler( algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x, dout); @@ -122,9 +122,9 @@ void eltwise_grad_use_out(const framework::ExecutionContext &ctx, auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - const auto *out = ctx.Input("Out"); - const auto *dout = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); + const auto *out = ctx.Input("Out"); + const auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); platform::ActivationMKLDNNHandler handler( algorithm, ctx, mkldnn_engine, ctx.GetPlace(), out, dout); diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index f41068dd5f1ae..d7575f0ebf885 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -128,13 +128,13 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< } std::shared_ptr AcquireMeanMemory( - const framework::Tensor *mean) { + const phi::DenseTensor *mean) { const T *mean_data = mean->data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), to_void_cast(mean_data)); } - std::shared_ptr AcquireMeanMemory(framework::Tensor *mean) { + std::shared_ptr AcquireMeanMemory(phi::DenseTensor *mean) { T *mean_data = mean->mutable_data(this->place_, this->fwd_pd_->mean_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), @@ -142,14 +142,14 @@ class BatchNormMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT< } std::shared_ptr AcquireVarianceMemory( - const framework::Tensor *variance) { + const phi::DenseTensor *variance) { const T *variance_data = variance->data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), to_void_cast(variance_data)); } std::shared_ptr AcquireVarianceMemory( - framework::Tensor *variance) { + phi::DenseTensor *variance) { T *variance_data = variance->mutable_data( this->place_, this->fwd_pd_->variance_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), @@ -170,13 +170,13 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { const bool test_mode = is_test && (!trainable_stats); const bool global_stats = test_mode || use_global_stats; - const auto *x = ctx.Input("X"); - const auto *scale = ctx.Input("Scale"); - const auto *shift = ctx.Input("Bias"); + const auto *x = ctx.Input("X"); + const auto *scale = ctx.Input("Scale"); + const auto *shift = ctx.Input("Bias"); - auto *y = ctx.Output("Y"); - auto *batch_mean = ctx.Output("SavedMean"); - auto *batch_variance = ctx.Output("SavedVariance"); + auto *y = ctx.Output("Y"); + auto *batch_mean = ctx.Output("SavedMean"); + auto *batch_variance = ctx.Output("SavedVariance"); BatchNormMKLDNNHandler handler( ctx, mkldnn_engine, x, global_stats, test_mode); @@ -190,8 +190,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { if (global_stats) { // mean and variance are taken from input Tensor - const auto *mean = ctx.Input("Mean"); - const auto *variance = ctx.Input("Variance"); + const auto *mean = ctx.Input("Mean"); + const auto *variance = ctx.Input("Variance"); mean_memory = handler.AcquireMeanMemory(mean); variance_memory = handler.AcquireVarianceMemory(variance); @@ -213,8 +213,8 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { astream.wait(); if (!global_stats) { - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); const float momentum = ctx.Attr("momentum"); const unsigned int C = phi::vectorize(scale->dims())[0]; @@ -246,15 +246,18 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto &dev_ctx = ctx.template device_context(); auto mkldnn_engine = dev_ctx.GetEngine(); - const auto *x = ctx.Input("X"); - const auto *scale = ctx.Input("Scale"); - const auto *shift = ctx.Input("Bias"); - const auto *batch_mean = ctx.Input("SavedMean"); - const auto *batch_variance = ctx.Input("SavedVariance"); - const auto *diff_y = ctx.Input(framework::GradVarName("Y")); - auto *diff_x = ctx.Output(framework::GradVarName("X")); - auto *diff_scale = ctx.Output(framework::GradVarName("Scale")); - auto *diff_shift = ctx.Output(framework::GradVarName("Bias")); + const auto *x = ctx.Input("X"); + const auto *scale = ctx.Input("Scale"); + const auto *shift = ctx.Input("Bias"); + const auto *batch_mean = ctx.Input("SavedMean"); + const auto *batch_variance = ctx.Input("SavedVariance"); + const auto *diff_y = + ctx.Input(framework::GradVarName("Y")); + auto *diff_x = ctx.Output(framework::GradVarName("X")); + auto *diff_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *diff_shift = + ctx.Output(framework::GradVarName("Bias")); BatchNormMKLDNNHandler handler(ctx, mkldnn_engine, x, scale, diff_y); diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index b16576505dfd3..5df17de25bbe8 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -28,7 +28,7 @@ using dnnl::primitive; using dnnl::stream; using framework::DataLayout; using framework::LoDTensor; -using framework::Tensor; + using platform::to_void_cast; template @@ -37,8 +37,8 @@ class ConcatMKLDNNHandler public: ConcatMKLDNNHandler(const framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, - const std::vector& inputs, - Tensor* output) + const std::vector& inputs, + phi::DenseTensor* output) : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, ctx.GetPlace()) { int concat_axis = ctx.Attr("axis"); @@ -53,7 +53,7 @@ class ConcatMKLDNNHandler concat_axis)); if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); + auto* axis_tensor = ctx.Input("AxisTensor"); concat_axis = GetDataFromTensor(axis_tensor)[0]; auto out_dims = inputs[0]->dims(); for (size_t i = 1; i < inputs.size(); ++i) { @@ -110,14 +110,15 @@ class ConcatMKLDNNHandler dst_md, concat_axis, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { + std::shared_ptr AcquireSrcMemory(const phi::DenseTensor& input, + int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), to_void_cast(input_data)); } }; -static void EnforceLayouts(const std::vector inputs) { +static void EnforceLayouts(const std::vector inputs) { for (auto* input : inputs) { PADDLE_ENFORCE_EQ( input->layout(), @@ -127,13 +128,14 @@ static void EnforceLayouts(const std::vector inputs) { } // From a multi-input, gather only nonempty inputs -static const std::vector ReduceMultiInput( - const std::vector& inputs) { - std::vector reduced(inputs.size()); - auto end_it = std::copy_if( - inputs.begin(), inputs.end(), reduced.begin(), [](const Tensor* t) { - return t->numel() > 0; - }); +static const std::vector ReduceMultiInput( + const std::vector& inputs) { + std::vector reduced(inputs.size()); + auto end_it = + std::copy_if(inputs.begin(), + inputs.end(), + reduced.begin(), + [](const phi::DenseTensor* t) { return t->numel() > 0; }); reduced.resize(std::distance(reduced.begin(), end_it)); return reduced; } @@ -147,9 +149,9 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel { const auto& mkldnn_engine = dev_ctx.GetEngine(); // If any of the multiple inputs of concat has an input size of 0, the // actual size of the multi_input will change - auto multi_input = ReduceMultiInput(ctx.MultiInput("X")); + auto multi_input = ReduceMultiInput(ctx.MultiInput("X")); EnforceLayouts(multi_input); - Tensor* output = ctx.Output("Out"); + phi::DenseTensor* output = ctx.Output("Out"); ConcatMKLDNNHandler handler(ctx, mkldnn_engine, multi_input, output); @@ -187,7 +189,8 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { auto out_var_names = ctx.OutputNames(framework::GradVarName("X")); const auto x = ctx.MultiInput("X"); - const auto* dout = ctx.Input(framework::GradVarName("Out")); + const auto* dout = + ctx.Input(framework::GradVarName("Out")); auto dx = ctx.MultiOutput(framework::GradVarName("X")); for (size_t i = 0; i < dx.size(); ++i) { @@ -198,7 +201,7 @@ class ConcatGradMKLDNNOpKernel : public paddle::framework::OpKernel { int axis = ctx.Attr("axis"); if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); + auto* axis_tensor = ctx.Input("AxisTensor"); axis = GetDataFromTensor(axis_tensor)[0]; } diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index fc8f29913097c..6b5f1f6a35741 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -34,12 +34,13 @@ inline MKLDNNMemoryFormat GetWeightsFormat(const int groups, } } -static dnnl::memory::data_type GetDstType(bool is_int8, - bool is_bfloat16, - bool force_fp32_output, - std::string fuse_activation, - bool fuse_residual_conn, - const Tensor* residual_param) { +static dnnl::memory::data_type GetDstType( + bool is_int8, + bool is_bfloat16, + bool force_fp32_output, + std::string fuse_activation, + bool fuse_residual_conn, + const phi::DenseTensor* residual_param) { auto dst_dt = dnnl::memory::data_type::f32; if (is_int8) { dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6") @@ -76,10 +77,10 @@ class ConvMKLDNNHandlerT const platform::MKLDNNDeviceContext& dev_ctx, const dnnl::engine mkldnn_engine, platform::Place cpu_place, - const Tensor* input, - const Tensor* filter, - const Tensor* bias, - Tensor* output, + const phi::DenseTensor* input, + const phi::DenseTensor* filter, + const phi::DenseTensor* bias, + phi::DenseTensor* output, const std::string& unique_name) : platform::MKLDNNHandlerTdev_ctx_.GetBlob(key_bs)); if (bias_scale_tuple) return bias_scale_tuple; - const auto* filter = ctx.Input("Filter"); + const auto* filter = ctx.Input("Filter"); const auto& weights_tz = phi::vectorize(filter->dims()); const int groups = std::max(ctx.Attr("groups"), 1); @@ -482,7 +483,7 @@ class ConvMKLDNNHandlerT std::tuple, float> get_int8_scales( const framework::ExecutionContext& ctx) const { - const auto* filter = ctx.Input("Filter"); + const auto* filter = ctx.Input("Filter"); const auto& weights_tz = phi::vectorize(filter->dims()); const bool& force_fp32_output = ctx.Attr("force_fp32_output"); @@ -567,7 +568,7 @@ class ConvMKLDNNHandlerT std::shared_ptr AcquireWeightsMemoryWithReorderFromDataPrimitive( - const framework::Tensor* filter, const int groups, const bool is_conv3d) { + const phi::DenseTensor* filter, const int groups, const bool is_conv3d) { const K* filter_data = filter->data(); auto weights_tz = phi::vectorize(filter->dims()); platform::GetGroupConvWeightsTz(weights_tz, groups); @@ -586,7 +587,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireSrcMemoryWithReorder( - const framework::Tensor* input) { + const phi::DenseTensor* input) { return this->AcquireMemoryWithReorderPrimitive(input, "@src_mem_p_user", "@src_mem_p_target", @@ -595,7 +596,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireSrcMemoryWithReorderFromWeightsPrimitive( - const framework::Tensor* input) { + const phi::DenseTensor* input) { return this->AcquireMemoryWithReorderPrimitive(input, "@src_mem_w_p_user", "@src_mem_w_p_target", @@ -605,7 +606,7 @@ class ConvMKLDNNHandlerT std::shared_ptr AcquireDiffDstMemoryWithReorderFromWeightsPrimitive( - const framework::Tensor* out_grad) { + const phi::DenseTensor* out_grad) { return this->AcquireMemoryWithReorderPrimitive( out_grad, "@diff_dst_mem_w_p_user", @@ -616,7 +617,7 @@ class ConvMKLDNNHandlerT std::shared_ptr AcquireDiffDstMemoryWithReorderMemoryFromDataPrimitive( - const framework::Tensor* out_grad) { + const phi::DenseTensor* out_grad) { return this->AcquireMemoryWithReorderPrimitive( out_grad, "@diff_dst_mem_p_user", @@ -626,7 +627,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireMemoryWithReorderPrimitive( - const framework::Tensor* in_mem, + const phi::DenseTensor* in_mem, const char* key_mem_user, const char* key_mem_target, const char* key_mem, @@ -653,7 +654,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireWeightsMemoryWithReorder( - const framework::Tensor* filter, + const phi::DenseTensor* filter, const int groups, const bool is_conv3d, const bool is_test, @@ -706,7 +707,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireBiasMemoryWithReorder( - const framework::Tensor* bias, + const phi::DenseTensor* bias, const bool is_test, const std::vector& scale_data = {1.0f}, int mask = 0) { @@ -736,7 +737,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireResidualMemory( - const framework::Tensor* residual_param) { + const phi::DenseTensor* residual_param) { void* residual_data = framework::TransToProtoVarType(residual_param->dtype()) == framework::DataTypeTrait::DataType() @@ -754,7 +755,7 @@ class ConvMKLDNNHandlerT } std::shared_ptr AcquireDstMemoryWithResidual( - framework::Tensor* output, const framework::Tensor* residual_param) { + phi::DenseTensor* output, const phi::DenseTensor* residual_param) { std::shared_ptr dst_memory_p; if (residual_param->mem_desc() != this->fwd_pd_->dst_desc()) { auto residual_memory_p = this->AcquireResidualMemory(residual_param); @@ -784,7 +785,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { bool is_INT8 = std::is_same::value || std::is_same::value; bool is_BFLOAT16 = ctx.Attr("mkldnn_data_type") == "bfloat16"; - auto residual_param = ctx.Input("ResidualData"); + auto residual_param = ctx.Input("ResidualData"); bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); std::string fuse_activation = ctx.Attr("fuse_activation"); bool force_fp32_output = ctx.Attr("force_fp32_output"); @@ -821,11 +822,11 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { const bool is_conv3d = ctx.Attr>("strides").size() == 3U; const bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); - const auto* input = ctx.Input("Input"); - const auto* filter = ctx.Input("Filter"); + const auto* input = ctx.Input("Input"); + const auto* filter = ctx.Input("Filter"); const auto* bias = - ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; - auto* output = ctx.Output("Output"); + ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; + auto* output = ctx.Output("Output"); ConvMKLDNNHandlerT handler( ctx, @@ -845,7 +846,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { std::shared_ptr dst_memory_p; if (fuse_residual_conn) { - auto* residual_param = ctx.Input("ResidualData"); + auto* residual_param = ctx.Input("ResidualData"); dst_memory_p = handler.AcquireDstMemoryWithResidual(output, residual_param); } else { @@ -898,10 +899,11 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { platform::errors::Unimplemented( "residual fusion does not support force output with fp32")); - auto* input = ctx.Input("Input"); - auto* filter = ctx.Input("Filter"); - auto* bias = ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; - auto* output = ctx.Output("Output"); + auto* input = ctx.Input("Input"); + auto* filter = ctx.Input("Filter"); + auto* bias = + ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; + auto* output = ctx.Output("Output"); ConvMKLDNNHandlerT handler( ctx, @@ -927,7 +929,7 @@ class ConvMKLDNNOpKernel : public framework::OpKernel { std::shared_ptr dst_memory_p; if (fuse_residual_conn) { - auto* residual_param = ctx.Input("ResidualData"); + auto* residual_param = ctx.Input("ResidualData"); PADDLE_ENFORCE_EQ( output->dims(), residual_param->dims(), @@ -998,14 +1000,16 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const Tensor* input = ctx.Input("Input"); - const Tensor* filter = ctx.Input("Filter"); - const Tensor* bias = - ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; - const Tensor* output_grad = - ctx.Input(framework::GradVarName("Output")); - Tensor* input_grad = ctx.Output(framework::GradVarName("Input")); - Tensor* filter_grad = ctx.Output(framework::GradVarName("Filter")); + const phi::DenseTensor* input = ctx.Input("Input"); + const phi::DenseTensor* filter = ctx.Input("Filter"); + const phi::DenseTensor* bias = + ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; + const phi::DenseTensor* output_grad = + ctx.Input(framework::GradVarName("Output")); + phi::DenseTensor* input_grad = + ctx.Output(framework::GradVarName("Input")); + phi::DenseTensor* filter_grad = + ctx.Output(framework::GradVarName("Filter")); if (!input_grad && !filter_grad) return; diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 80163389318aa..d2dfc9a9c1ccf 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -21,10 +21,11 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using framework::DataLayout; -inline dnnl::memory::dims GetWeightsTz(const Tensor* filter, const int groups) { +inline dnnl::memory::dims GetWeightsTz(const phi::DenseTensor* filter, + const int groups) { auto weights_tz = phi::vectorize(filter->dims()); int g = std::max(groups, 1); int g_dim = (g > 1) ? 1 : 0; @@ -40,10 +41,10 @@ class ConvTransposeMKLDNNHandlerT public: ConvTransposeMKLDNNHandlerT(const framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, - const Tensor* input, - const Tensor* filter, - const Tensor* bias, - Tensor* output) + const phi::DenseTensor* input, + const phi::DenseTensor* filter, + const phi::DenseTensor* bias, + phi::DenseTensor* output) : platform::MKLDNNHandlerNoCachingT( mkldnn_engine, ctx.GetPlace()), is_test_(ctx.Attr("is_test")) { @@ -218,7 +219,7 @@ class ConvTransposeMKLDNNHandlerT } std::shared_ptr AcquireSrcMemoryWithReorder( - const framework::Tensor* input) { + const phi::DenseTensor* input) { const T* input_data = input->data(); return platform::MKLDNNHandlerNoCachingT:: AcquireMemoryWithReorder(input->mem_desc(), @@ -229,7 +230,7 @@ class ConvTransposeMKLDNNHandlerT std::shared_ptr AcquireWeightsMemoryWithReorder( const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key, - const framework::Tensor* filter, + const phi::DenseTensor* filter, const int& groups) { const K* filter_data = filter->data(); auto weights_tz = GetWeightsTz(filter, groups); @@ -331,7 +332,7 @@ class ConvTransposeMKLDNNHandlerT std::shared_ptr AcquireBiasMemoryWithReorder( const platform::MKLDNNDeviceContext& dev_ctx, const std::string& key, - const framework::Tensor* bias) { + const phi::DenseTensor* bias) { const K* bias_data = bias->data(); auto user_bias_md = platform::MKLDNNMemDesc(phi::vectorize(bias->dims()), @@ -377,11 +378,11 @@ class ConvTransposeMKLDNNOpKernel : public framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const auto* input = ctx.Input("Input"); - const auto* filter = ctx.Input("Filter"); + const auto* input = ctx.Input("Input"); + const auto* filter = ctx.Input("Filter"); const auto* bias = - ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; - auto* output = ctx.Output("Output"); + ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; + auto* output = ctx.Output("Output"); ConvTransposeMKLDNNHandlerT handler( ctx, mkldnn_engine, input, filter, bias, output); auto src_memory_p = handler.AcquireSrcMemoryWithReorder(input); diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index c25c662d612b1..4ceddf53f9458 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "dnnl.hpp" +#include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/dequantize_op.h" @@ -27,7 +27,7 @@ using dnnl::memory; using dnnl::primitive; using dnnl::reorder; using platform::to_void_cast; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using dnnl::stream; using framework::DataLayout; using platform::GetMKLDNNFormat; @@ -36,11 +36,11 @@ template class DeQuantOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); + auto* x = ctx.Input("Input"); const auto quantization_scale = ctx.Attr("Scale"); const auto quantization_shift = ctx.Attr("Shift"); const bool with_shift = quantization_shift != 0.0f; - auto* out = ctx.Output("Output"); + auto* out = ctx.Output("Output"); PADDLE_ENFORCE(quantization_scale != 0.0f, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index d477fa0b2bf2c..98ebe42fa1f8d 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -20,7 +20,7 @@ namespace { using paddle::framework::ExecutionContext; using paddle::framework::GradVarName; -using paddle::framework::Tensor; + using paddle::platform::MKLDNNDeviceContext; using phi::vectorize; @@ -35,8 +35,8 @@ class ExpandMKLDNNKernel : public paddle::framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - const auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + const auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto x_vec_dims = vectorize(x->dims()); @@ -99,8 +99,8 @@ class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto* dout = ctx.Input(GradVarName("Out")); - auto* dx = ctx.Output(GradVarName("X")); + auto* dout = ctx.Input(GradVarName("Out")); + auto* dx = ctx.Output(GradVarName("X")); auto dx_vec_dims = vectorize(dx->dims()); auto dout_vec_dims = vectorize(dout->dims()); diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 7404972ea7cca..273f7b5c932e0 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -33,7 +33,7 @@ using framework::DataLayout; using framework::DDim; using framework::ExecutionContext; using framework::LoDTensor; -using framework::Tensor; + using platform::GetMKLDNNFormat; using platform::MKLDNNDeviceContext; using platform::to_void_cast; @@ -44,8 +44,8 @@ class FCPrimitiveFactory { explicit FCPrimitiveFactory(const dnnl::engine& engine) : engine_(engine) {} void ExecuteFcPrimitive(const LoDTensor* input, - const Tensor* weights, - const Tensor* bias, + const phi::DenseTensor* weights, + const phi::DenseTensor* bias, LoDTensor* output, const MKLDNNDeviceContext& dev_ctx, const ExecutionContext& ctx) { @@ -158,7 +158,7 @@ class FCPrimitiveFactory { // primitive. Therefore, function SetOutputFormat is needed to choose // an appropriate format based on the number of input dimensions and // format of an input tensor. - void SetOutputFormat(MKLDNNMemoryFormat in_format, Tensor* out) { + void SetOutputFormat(MKLDNNMemoryFormat in_format, phi::DenseTensor* out) { int dim_num = out->dims().size(); // In case of 2 dims, we set the only possible format, nc if (dim_num == 2) { @@ -184,8 +184,8 @@ class FCPrimitiveFactory { } void UpdateDataPointers(const ExecutionContext& ctx, - Tensor* out, - const Tensor* in) { + phi::DenseTensor* out, + const phi::DenseTensor* in) { input_->set_data_handle(to_void_cast(in->data())); output_->set_data_handle(out->mutable_data(ctx.GetPlace())); // If the primitive exists, but the output tensor has changed its @@ -198,8 +198,8 @@ class FCPrimitiveFactory { dnnl::inner_product_forward::primitive_desc Create2DFcPrimDescriptor( const LoDTensor* input, - const Tensor* weights, - const Tensor* bias, + const phi::DenseTensor* weights, + const phi::DenseTensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto src_desc = CreateMemDescriptor(input, MKLDNNMemoryFormat::any); @@ -212,7 +212,7 @@ class FCPrimitiveFactory { return CreateFcPrimDesc(src_desc, weights_desc, bias_desc, dst_desc, attrs); } - std::vector Get2DWeightDimsForDNNL(const Tensor* weights) { + std::vector Get2DWeightDimsForDNNL(const phi::DenseTensor* weights) { auto dims = phi::vectorize(weights->dims()); std::swap(dims[0], dims[1]); // swap input dim with output dim return dims; @@ -222,8 +222,8 @@ class FCPrimitiveFactory { dnnl::inner_product_forward::primitive_desc Create3DFcPrimDescriptor( const LoDTensor* input, - const Tensor* weights, - const Tensor* bias, + const phi::DenseTensor* weights, + const phi::DenseTensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto input_dims = phi::vectorize(input->dims()); @@ -245,20 +245,20 @@ class FCPrimitiveFactory { return CreateFcPrimDesc(src_desc, weights_desc, bias_desc, dst_desc, attrs); } - std::vector Get3DWeightDimsForDNNL(const Tensor* weights) { + std::vector Get3DWeightDimsForDNNL(const phi::DenseTensor* weights) { auto paddle_w_dims = phi::vectorize(weights->dims()); return {paddle_w_dims[1], paddle_w_dims[0], 1}; } - memory::desc Create3DUserWeightsDesc(const Tensor* weights) { + memory::desc Create3DUserWeightsDesc(const phi::DenseTensor* weights) { auto dims = Get3DWeightDimsForDNNL(weights); return CreateMemDescriptor(dims, MKLDNNMemoryFormat::oiw); } dnnl::inner_product_forward::primitive_desc Create4DFcPrimDescriptor( const LoDTensor* input, - const Tensor* weights, - const Tensor* bias, + const phi::DenseTensor* weights, + const phi::DenseTensor* bias, LoDTensor* output, const ExecutionContext& ctx) { auto src_desc = CreateMemDescriptor(input, MKLDNNMemoryFormat::any); @@ -274,7 +274,7 @@ class FCPrimitiveFactory { } std::vector Get4DWeightDimsForDNNL(const LoDTensor* input, - const Tensor* weights) { + const phi::DenseTensor* weights) { auto old_w_dims = phi::vectorize(weights->dims()); auto old_in_dims = phi::vectorize(input->dims()); auto dims = {old_w_dims[1], old_in_dims[1], old_in_dims[2], old_in_dims[3]}; @@ -282,7 +282,7 @@ class FCPrimitiveFactory { } memory::desc Create4DUserWeightsDesc(const LoDTensor* input, - const Tensor* weights) { + const phi::DenseTensor* weights) { auto dims = Get4DWeightDimsForDNNL(input, weights); return CreateMemDescriptor(dims, MKLDNNMemoryFormat::oihw); } @@ -351,7 +351,7 @@ class FCPrimitiveFactory { } template - static dnnl::memory::desc CreateMemDescriptor(const Tensor* tensor, + static dnnl::memory::desc CreateMemDescriptor(const phi::DenseTensor* tensor, MKLDNNMemoryFormat format) { auto dims = phi::vectorize(tensor->dims()); return CreateMemDescriptor(dims, format); @@ -359,7 +359,7 @@ class FCPrimitiveFactory { template dnnl::memory CreateMemory(const dnnl::memory::desc& desc, - const Tensor* tensor) { + const phi::DenseTensor* tensor) { return CreateMemory(desc, platform::to_void_cast(tensor->data())); } @@ -369,7 +369,7 @@ class FCPrimitiveFactory { template std::shared_ptr CreateMemoryToBeCached( - const dnnl::memory::desc& desc, const Tensor* tensor) { + const dnnl::memory::desc& desc, const phi::DenseTensor* tensor) { return CreateMemoryToBeCached(desc, platform::to_void_cast(tensor->data())); } @@ -380,7 +380,8 @@ class FCPrimitiveFactory { } // Create weights memory and transform to default MKL-DNN format - std::shared_ptr CreateWeightsMemory(const Tensor* weights) { + std::shared_ptr CreateWeightsMemory( + const phi::DenseTensor* weights) { auto dims = phi::vectorize(weights->dims()); std::swap(dims[0], dims[1]); // Correct output dimensions auto src_desc = CreateMemDescriptor(dims, MKLDNNMemoryFormat::io); @@ -557,10 +558,10 @@ class FCPrimitiveFactory { dnnl::memory CreateDstMemory( const dnnl::inner_product_forward::primitive_desc& fc_prim_desc, const ExecutionContext& ctx, - Tensor* output) { + phi::DenseTensor* output) { if (ctx.HasAttr("fuse_residual_connection") && ctx.Attr("fuse_residual_connection")) { - auto* residual_param = ctx.Output("ResidualData"); + auto* residual_param = ctx.Output("ResidualData"); PADDLE_ENFORCE_EQ( output->dims(), @@ -587,7 +588,7 @@ class FCPrimitiveFactory { void RecomputeOutputDims(const ExecutionContext& ctx, const LoDTensor* input, - const Tensor* w, + const phi::DenseTensor* w, LoDTensor* output) { int in_num_col_dims = ctx.Attr("in_num_col_dims"); bool padding_weights = ctx.Attr("padding_weights"); @@ -638,8 +639,8 @@ GetPrimitiveFactory(const MKLDNNDeviceContext& dev_ctx, template static void ExecuteFc(const ExecutionContext& ctx, const LoDTensor* input, - const Tensor* w, - const Tensor* bias, + const phi::DenseTensor* w, + const phi::DenseTensor* bias, LoDTensor* output, bool fuse_relu, bool force_fp32_output) { @@ -679,8 +680,8 @@ class FCMKLDNNOpKernel : public framework::OpKernel { platform::errors::PreconditionNotMet("FC MKL-DNN must use CPUPlace.")); platform::MKLDNNDeviceContext::tls().log_lib_version(); auto input = ctx.Input("Input"); - auto w = ctx.Input("W"); - auto bias = ctx.Input("Bias"); + auto w = ctx.Input("W"); + auto bias = ctx.Input("Bias"); auto output = ctx.Output("Out"); bool fuse_relu = ctx.Attr("activation_type") == "relu"; diff --git a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc index e7e45b4b6e426..7673b66455f8f 100644 --- a/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fill_constant_mkldnn_op.cc @@ -18,13 +18,11 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - template class FillConstantMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: - FillConstantMKLDNNHandler(Tensor* out, + FillConstantMKLDNNHandler(phi::DenseTensor* out, dnnl::engine engine, platform::Place cpu_place) : platform::MKLDNNHandlerNoCachingT(engine, cpu_place) { @@ -61,7 +59,7 @@ class FillConstantMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& dnnl_engine = dev_ctx.GetEngine(); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); T fill_value = CalculateFillValue(ctx); auto shape = GetShape(ctx); @@ -116,7 +114,7 @@ class FillConstantMKLDNNKernel : public framework::OpKernel { } if (ctx.HasInput("ValueTensor")) { - const auto* value_tensor = ctx.Input("ValueTensor"); + const auto* value_tensor = ctx.Input("ValueTensor"); PADDLE_ENFORCE_EQ( value_tensor->numel(), 1, diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc index 64d7bca4d0646..54c2e3e630a6a 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc @@ -35,8 +35,8 @@ class InterpolateMKLDNNHandler InterpolateMKLDNNHandler(const dnnl::algorithm algo, const dnnl::engine engine, platform::Place cpu_place, - const Tensor* x, - Tensor* out) + const phi::DenseTensor* x, + phi::DenseTensor* out) : platform::MKLDNNHandlerNoCachingT( engine, cpu_place) { const auto dst_tz = phi::vectorize(out->dims()); @@ -51,7 +51,7 @@ template class InterpolateMKLDNNKernel : public framework::OpKernel { std::vector ComputeOutputShape( const framework::ExecutionContext& ctx) const { - const auto* x = ctx.Input("X"); + const auto* x = ctx.Input("X"); const auto& in_dims = x->dims(); const framework::DDim in_dhw_dims = @@ -70,8 +70,8 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { out_dims.push_back(ctx.Attr("out_w")); } - auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); - auto out_size = ctx.Input("OutSize"); + auto list_new_size_tensor = ctx.MultiInput("SizeTensor"); + auto out_size = ctx.Input("OutSize"); if (list_new_size_tensor.size() > 0) { auto new_size = get_new_shape(list_new_size_tensor); if (new_size.size() == out_dims.size()) { @@ -85,7 +85,7 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { } else { std::vector scale; scale.reserve(3); - auto scale_tensor = ctx.Input("Scale"); + auto scale_tensor = ctx.Input("Scale"); if (scale_tensor != nullptr) { auto scale_data = get_new_data_from_tensor(scale_tensor); scale.resize(3, scale_data[0]); @@ -136,8 +136,8 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + const auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); const auto interp_method = ctx.Attr("interp_method"); const dnnl::algorithm algo = (interp_method == "nearest") diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc index 9aa7e26530d74..d69185f4526ec 100644 --- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc @@ -28,7 +28,7 @@ class LayerNormMKLDNNHandler const float& epsilon, const dnnl::normalization_flags& flags, const bool& is_test, - const Tensor* x, + const phi::DenseTensor* x, const dnnl::engine engine, platform::Place cpu_place) : platform::MKLDNNHandlerNoCachingT( @@ -39,8 +39,8 @@ class LayerNormMKLDNNHandler fwd_prop_kind, x->mem_desc(), epsilon, flags); } - std::shared_ptr AcquireScaleShiftMemory(const Tensor* scale, - const Tensor* shift) { + std::shared_ptr AcquireScaleShiftMemory( + const phi::DenseTensor* scale, const phi::DenseTensor* shift) { // OneDNN requires a single piece of memory for scale and shift data const unsigned int C = phi::vectorize(scale->dims())[0]; @@ -55,7 +55,7 @@ class LayerNormMKLDNNHandler return scaleshift_memory; } - std::shared_ptr AcquireMeanMemory(framework::Tensor* mean) { + std::shared_ptr AcquireMeanMemory(phi::DenseTensor* mean) { T* mean_data = mean->mutable_data(this->place_, this->fwd_pd_->mean_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->mean_desc(), @@ -63,7 +63,7 @@ class LayerNormMKLDNNHandler } std::shared_ptr AcquireVarianceMemory( - framework::Tensor* variance) { + phi::DenseTensor* variance) { T* variance_data = variance->mutable_data( this->place_, this->fwd_pd_->variance_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->variance_desc(), @@ -75,10 +75,10 @@ template class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* scale = ctx.Input("Scale"); - auto* bias = ctx.Input("Bias"); - auto* out = ctx.Output("Y"); + auto* x = ctx.Input("X"); + auto* scale = ctx.Input("Scale"); + auto* bias = ctx.Input("Bias"); + auto* out = ctx.Output("Y"); const float epsilon = ctx.Attr("epsilon"); const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); @@ -116,8 +116,8 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel { {DNNL_ARG_DST, *dst_memory}}; if (!is_test) { - auto* mean = ctx.Output("Mean"); - auto* var = ctx.Output("Variance"); + auto* mean = ctx.Output("Mean"); + auto* var = ctx.Output("Variance"); auto mean_memory = handler.AcquireMeanMemory(mean); auto variance_memory = handler.AcquireVarianceMemory(var); diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 7043b3b4dda0a..12e12ca428a32 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using paddle::framework::Tensor; using paddle::platform::MKLDNNDeviceContext; template @@ -28,7 +27,7 @@ class LRNMKLDNNHandler LRNMKLDNNHandler(const framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, platform::Place cpu_place, - const Tensor* input) + const phi::DenseTensor* input) : platform:: MKLDNNHandlerNoCachingT( @@ -59,9 +58,9 @@ class LRNMKLDNNHandler LRNMKLDNNHandler(const framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, platform::Place cpu_place, - const Tensor* in_x, - const Tensor* out_grad, - Tensor* in_x_grad) + const phi::DenseTensor* in_x, + const phi::DenseTensor* out_grad, + phi::DenseTensor* in_x_grad) : platform:: MKLDNNHandlerNoCachingT( mkldnn_engine, cpu_place) { @@ -95,7 +94,8 @@ class LRNMKLDNNHandler k); } - std::shared_ptr AcquireWorkspaceMemory(Tensor* workspace) { + std::shared_ptr AcquireWorkspaceMemory( + phi::DenseTensor* workspace) { T* ptr = workspace->mutable_data( this->place_, this->fwd_pd_->workspace_desc().get_size()); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->workspace_desc(), @@ -103,7 +103,7 @@ class LRNMKLDNNHandler } std::shared_ptr AcquireBackwardWorkspaceMemory( - const Tensor* workspace) { + const phi::DenseTensor* workspace) { const T* workspace_data = workspace->data(); return this->AcquireMemoryFromPrimitive( this->fwd_pd_->workspace_desc(), @@ -128,9 +128,9 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); - auto mid = ctx.Output("MidOut"); + auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); + auto mid = ctx.Output("MidOut"); LRNMKLDNNHandler handler(ctx, mkldnn_engine, ctx.GetPlace(), x); @@ -173,11 +173,11 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel { paddle::platform::errors::PreconditionNotMet( "Operator DNNL LRNGrad must use CPUPlace")); - auto in_x = ctx.Input("X"); - auto mid = ctx.Input("MidOut"); + auto in_x = ctx.Input("X"); + auto mid = ctx.Input("MidOut"); - auto out_grad = ctx.Input(framework::GradVarName("Out")); - auto in_x_grad = ctx.Output(framework::GradVarName("X")); + auto out_grad = ctx.Input(framework::GradVarName("Out")); + auto in_x_grad = ctx.Output(framework::GradVarName("X")); auto& dev_ctx = ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h index 0abd53a5bb616..53dd177071496 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h @@ -24,7 +24,7 @@ namespace operators { using framework::ExecutionContext; using platform::MKLDNNDeviceContext; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class MatMulGradMKLDNNKernel : public framework::OpKernel { @@ -35,13 +35,13 @@ class MatMulGradMKLDNNKernel : public framework::OpKernel { void ExecuteMatMulGrad(const ExecutionContext& ctx, const MKLDNNDeviceContext& dev_ctx, const dnnl::engine& engine, - Tensor* x, + phi::DenseTensor* x, bool trans_x, bool is_fold_init_dims_x, - Tensor* y, + phi::DenseTensor* y, bool trans_y, bool is_fold_init_dims_y, - Tensor* out) const; + phi::DenseTensor* out) const; void RunKernel(const ExecutionContext& ctx) const; }; } // namespace operators diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc index 000e31aad9ac9..44296d12f2bac 100644 --- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc @@ -24,7 +24,7 @@ using paddle::platform::MKLDNNFormatForSize; using paddle::platform::MKLDNNGetDataType; using paddle::platform::to_void_cast; using phi::vectorize; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; using paddle::framework::GradVarName; using phi::make_ddim; @@ -106,7 +106,7 @@ static paddle::framework::DDim ColumnMatrixDimsFromVector( phi::DDim GetDimForInput(const ExecutionContext &ctx, std::string input_name) { auto shape = ctx.Attr>("fused_reshape_" + input_name); auto axis = ctx.Attr>("fused_transpose_" + input_name); - auto input_dims = ctx.Input(input_name)->dims(); + auto input_dims = ctx.Input(input_name)->dims(); if (!shape.empty() && !axis.empty()) { return input_dims.reshape(shape).transpose(axis); } @@ -182,9 +182,9 @@ class MatMulMKLDNNHandler } public: - void Execute(const paddle::framework::Tensor *x, - const paddle::framework::Tensor *y, - paddle::framework::Tensor *out) { + void Execute(const phi::DenseTensor *x, + const phi::DenseTensor *y, + phi::DenseTensor *out) { const auto src_memory_p = this->AcquireSrcMemory(x); const auto weights_memory_p = this->AcquireWeightsMemory(y); const auto dst_memory_p = this->AcquireDstMemory(out); @@ -217,8 +217,7 @@ class MatMulMKLDNNHandler out->set_mem_desc(dst_memory_p->get_desc().reshape(out->dims())); } - std::shared_ptr AcquireDstMemory( - paddle::framework::Tensor *output) { + std::shared_ptr AcquireDstMemory(phi::DenseTensor *output) { // We cannot use base AcquireDstMemory as it makes an allocation request // base on DST memory primitive size. This is fine in general, but in MatMul // we have primitive that covers only one batch of Data and then shift @@ -241,7 +240,7 @@ class MatMulMKLDNNHandler const ExecutionContext &ctx, std::string input_name) { auto shape = ctx.Attr>("fused_reshape_" + input_name); auto axis = ctx.Attr>("fused_transpose_" + input_name); - auto input_dims = ctx.Input(input_name)->dims(); + auto input_dims = ctx.Input(input_name)->dims(); auto new_dims = input_dims; if (!shape.empty() && !axis.empty()) { new_dims = input_dims.reshape(shape).transpose(axis); @@ -478,9 +477,9 @@ static void ExecuteMatMul(const ExecutionContext &ctx) { ctx.HasAttr("fuse_activation") ? ctx.Attr("fuse_activation") == "relu" : false; - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *out = ctx.Output("Out"); + auto *x = ctx.Input("X"); + auto *y = ctx.Input("Y"); + auto *out = ctx.Output("Out"); const auto &dev_ctx = ctx.template device_context(); const auto &onednn_engine = dev_ctx.GetEngine(); @@ -551,7 +550,7 @@ std::vector GetInputStrides(const ExecutionContext &ctx, const std::string input_name) { auto shape = ctx.Attr>("fused_reshape_" + input_name); auto axis = ctx.Attr>("fused_transpose_" + input_name); - auto input_dims = ctx.Input(input_name)->dims(); + auto input_dims = ctx.Input(input_name)->dims(); auto new_dims = input_dims; if (!shape.empty() && !axis.empty()) { new_dims = input_dims.reshape(shape).transpose(axis); @@ -639,7 +638,7 @@ void ExecuteMatMulV2(const ExecutionContext &ctx, {DNNL_ARG_DST, *dst_memory_p}}; if (ctx.HasInput("ResidualData")) { - auto *residual_data = ctx.Input("ResidualData"); + auto *residual_data = ctx.Input("ResidualData"); const auto residual_data_memory_p = handler.AcquireSrcMemory(residual_data); matmul_args.insert({DNNL_ARG_ATTR_MULTIPLE_POST_OP(0) | DNNL_ARG_SRC_1, *residual_data_memory_p}); @@ -746,9 +745,9 @@ class MatMulV2MKLDNNKernel : public paddle::framework::OpKernel { const auto &dev_ctx = ctx.template device_context(); const auto &onednn_engine = dev_ctx.GetEngine(); - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *out = ctx.Output("Out"); + auto *x = ctx.Input("X"); + auto *y = ctx.Input("Y"); + auto *out = ctx.Output("Out"); bool trans_x = ctx.HasAttr("trans_x") ? ctx.Attr("trans_x") : ctx.Attr("transpose_X"); bool trans_y = ctx.HasAttr("trans_y") ? ctx.Attr("trans_y") @@ -858,8 +857,8 @@ class MatMulV2GradMKLDNNKernel : public paddle::framework::OpKernel { const auto &dev_ctx = ctx.template device_context(); const auto &onednn_engine = dev_ctx.GetEngine(); - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); + auto *x = ctx.Input("X"); + auto *y = ctx.Input("Y"); auto x_dims = vectorize(x->dims()); auto y_dims = vectorize(y->dims()); @@ -882,9 +881,9 @@ class MatMulV2GradMKLDNNKernel : public paddle::framework::OpKernel { return; } - auto *dout = ctx.Input(GradVarName("Out")); - auto *dx = ctx.Output(GradVarName("X")); - auto *dy = ctx.Output(GradVarName("Y")); + auto *dout = ctx.Input(GradVarName("Out")); + auto *dx = ctx.Output(GradVarName("X")); + auto *dy = ctx.Output(GradVarName("Y")); bool trans_x = ctx.HasAttr("trans_x") ? ctx.Attr("trans_x") : ctx.Attr("transpose_X"); @@ -1133,11 +1132,11 @@ void MatMulGradMKLDNNKernel::RunKernel(const ExecutionContext &ctx) const { ctx.template device_context(); const auto &onednn_engine = dev_ctx.GetEngine(); - auto x = *ctx.Input("X"); - auto y = *ctx.Input("Y"); - auto dout = *ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); - auto *dy = ctx.Output(framework::GradVarName("Y")); + auto x = *ctx.Input("X"); + auto y = *ctx.Input("Y"); + auto dout = *ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); + auto *dy = ctx.Output(framework::GradVarName("Y")); bool transpose_x = ctx.HasAttr("transpose_X") ? ctx.Attr("transpose_X") : ctx.Attr("trans_x"); diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index e9150b0c58f76..29329351de8d6 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -28,7 +28,6 @@ using framework::DataLayout; using framework::DDim; using framework::ExecutionContext; using framework::LoDTensor; -using framework::Tensor; using platform::MatMulV2MKLDNNHandler; using platform::MKLDNNDeviceContext; @@ -378,9 +377,9 @@ class MulMKLDNNINT8Kernel : public framework::OpKernel { auto &dev_ctx = ctx.template device_context(); auto &mkldnn_engine = dev_ctx.GetEngine(); - const Tensor *x = ctx.Input("X"); - const Tensor *y = ctx.Input("Y"); - Tensor *out = ctx.Output("Out"); + const Tensor *x = ctx.Input("X"); + const Tensor *y = ctx.Input("Y"); + Tensor *out = ctx.Output("Out"); auto out_dims = out->dims(); auto mul = GetMulPrimitive(dev_ctx, ctx, x, y, out, mkldnn_engine); @@ -451,9 +450,9 @@ class MulMKLDNNKernel : public framework::OpKernel { const auto &dev_ctx = ctx.template device_context(); const auto &onednn_engine = dev_ctx.GetEngine(); - const auto *x = ctx.Input("X"); - const auto *y = ctx.Input("Y"); - auto *out = ctx.Output("Out"); + const auto *x = ctx.Input("X"); + const auto *y = ctx.Input("Y"); + auto *out = ctx.Output("Out"); int x_num_col_dims = ctx.Attr("x_num_col_dims"); int y_num_col_dims = ctx.Attr("y_num_col_dims"); @@ -502,7 +501,8 @@ class MulGradMKLDNNKernel : public MulMKLDNNKernel { const auto *x = ctx.Input("X"); const auto *y = ctx.Input("Y"); - const auto *dout = ctx.Input(framework::GradVarName("Out")); + const auto *dout = + ctx.Input(framework::GradVarName("Out")); auto *dx = ctx.Output(framework::GradVarName("X")); auto *dy = ctx.Output(framework::GradVarName("Y")); diff --git a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc index e7a528c452b8d..39af6d780ba86 100644 --- a/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pad3d_mkldnn_op.cc @@ -17,8 +17,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - /* Pad3D is done by using up to 7 reorders. Following example is done on 2D data for simplicity, but it is straightforward to extend it to 3D case. @@ -72,9 +70,9 @@ class PadMKLDNNKernel : public framework::OpKernel { const auto& onednn_engine = dev_ctx.GetEngine(); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - auto* paddings_tensor = ctx.Input("Paddings"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + auto* paddings_tensor = ctx.Input("Paddings"); std::vector paddings(ctx.Attr>("paddings")); if (paddings_tensor) { std::copy(paddings_tensor->data(), diff --git a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc index da401a4947f55..e3b9d3ffd7c6a 100644 --- a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using dnnl::memory; -using framework::Tensor; + using platform::GetMKLDNNFormat; using platform::MKLDNNDeviceContext; using platform::MKLDNNGetDataType; @@ -34,8 +34,8 @@ class PReluMKLDNNHandler PReluMKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, const dnnl::engine engine, platform::Place cpu_place, - const Tensor* x, - const Tensor* weights, + const phi::DenseTensor* x, + const phi::DenseTensor* weights, const std::string& uniq_name, const std::string& mode, const std::string& data_format, @@ -70,7 +70,7 @@ class PReluMKLDNNHandler } std::shared_ptr AcquireWeightsMemoryPossiblyWithReorder( - const Tensor* weights, const bool is_test) { + const phi::DenseTensor* weights, const bool is_test) { const T* weights_data = weights->data(); // if weights are 1D, every format tag is correct, so we accept @@ -88,7 +88,7 @@ class PReluMKLDNNHandler is_test); } - std::shared_ptr AcquireDiffWeightsMemory(Tensor* output) { + std::shared_ptr AcquireDiffWeightsMemory(phi::DenseTensor* output) { T* output_data = output->mutable_data( this->place_, this->bwd_pd_->diff_weights_desc().get_size()); return this->AcquireMemoryFromPrimitive( @@ -108,9 +108,9 @@ class PReluMKLDNNKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - const auto* x = ctx.Input("X"); - const auto* alpha = ctx.Input("Alpha"); - auto* out = ctx.Output("Out"); + const auto* x = ctx.Input("X"); + const auto* alpha = ctx.Input("Alpha"); + auto* out = ctx.Output("Out"); const bool is_test = ctx.Attr("is_test"); const auto mode = ctx.Attr("mode"); const auto data_format = ctx.Attr("data_format"); @@ -153,11 +153,12 @@ class PReluGradMKLDNNKernel : public framework::OpKernel { const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto* x = ctx.Input("X"); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dalpha = ctx.Output(framework::GradVarName("Alpha")); - auto* alpha = ctx.Input("Alpha"); + auto* x = ctx.Input("X"); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dalpha = + ctx.Output(framework::GradVarName("Alpha")); + auto* alpha = ctx.Input("Alpha"); const bool is_test = ctx.Attr("is_test"); const auto mode = ctx.Attr("mode"); const auto data_format = ctx.Attr("data_format"); diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 54827a9dd904b..af8843c74179e 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -26,7 +26,7 @@ using dnnl::memory; using dnnl::primitive; using dnnl::reorder; using platform::to_void_cast; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using dnnl::stream; using framework::DataLayout; using platform::GetMKLDNNFormat; @@ -35,8 +35,8 @@ template class QuantOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); - auto* out = ctx.Output("Output"); + auto* x = ctx.Input("Input"); + auto* out = ctx.Output("Output"); const auto quantization_scale = ctx.Attr("Scale"); const auto quantization_shift = ctx.Attr("Shift"); diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index ea30d7a6c5fc2..abfef00ae1678 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "dnnl.hpp" +#include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/requantize_op.h" @@ -24,7 +24,7 @@ namespace operators { using dnnl::memory; using dnnl::reorder; using platform::to_void_cast; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; namespace { @@ -38,13 +38,13 @@ template class ReQuantOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); + auto* input = ctx.Input("Input"); auto scale_in = ctx.Attr("Scale_in"); auto shift_in = ctx.Attr("Shift_in"); auto scale_out = ctx.Attr("Scale_out"); auto shift_out = ctx.Attr("Shift_out"); bool with_shift = shift_in != 0.0f || shift_out != 0.0f; - auto* output = ctx.Output("Output"); + auto* output = ctx.Output("Output"); PADDLE_ENFORCE_NE( scale_in, diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index ea56b84c90889..dea6abd0c02b4 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -35,7 +35,7 @@ using platform::GetMKLDNNFormat; using platform::to_void_cast; static std::vector extract_shape( - const std::vector& list_new_shape_tensor) { + const std::vector& list_new_shape_tensor) { std::vector vec_new_shape; vec_new_shape.reserve(list_new_shape_tensor.size()); @@ -158,7 +158,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { const framework::ExecutionContext& ctx, framework::DDim& x_dims, // NOLINT framework::DDim& out_dims) const { // NOLINT - auto list_new_shape_tensor = ctx.MultiInput("ShapeTensor"); + auto list_new_shape_tensor = + ctx.MultiInput("ShapeTensor"); if (list_new_shape_tensor.size() > 0) { auto new_shape = extract_shape(list_new_shape_tensor); out_dims = ValidateShape(new_shape, x_dims); @@ -202,7 +203,8 @@ class ReshapeMKLDNNKernel : public framework::OpKernel { } protected: - static dnnl::memory::format_tag getPlainFormatTag(const Tensor* tensor) { + static dnnl::memory::format_tag getPlainFormatTag( + const phi::DenseTensor* tensor) { auto tensor_dims_size = tensor->dims().size(); PADDLE_ENFORCE_EQ( tensor_dims_size <= 6 && tensor_dims_size >= 1, diff --git a/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc index 6a05585a37c6f..0e0e77e33a6d1 100644 --- a/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/shape_mkldnn_op.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; @@ -43,7 +43,7 @@ class ShapeMKLDNNKernel : public framework::OpKernel { in_dims = phi::make_ddim(rdims); } } - auto* out_t = ctx.Output("Out"); + auto* out_t = ctx.Output("Out"); out_t->Resize({in_dims.size()}); auto out_data = out_t->mutable_data(platform::CPUPlace()); for (int i = 0; i < in_dims.size(); ++i) { diff --git a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc index 97c8184ebec28..fd1b1927f5fbb 100644 --- a/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/shuffle_channel_mkldnn_op.cc @@ -17,13 +17,12 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; using platform::MKLDNNGetDataType; template class ShuffleChannelMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: - ShuffleChannelMKLDNNHandler(const Tensor* x, + ShuffleChannelMKLDNNHandler(const phi::DenseTensor* x, const int group, const dnnl::engine engine, platform::Place cpu_place) @@ -43,8 +42,8 @@ class ShuffleChannelMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + const auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); // oneDNN handles group using C/g instead of g const int group = x->dims()[1] / ctx.Attr("group"); diff --git a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc index a7c6bd28486f8..05d05ab995a4b 100644 --- a/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using paddle::framework::Tensor; - template class SliceMKLDNNKernel : public framework::OpKernel { public: @@ -32,8 +30,8 @@ class SliceMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto* x = ctx.Input("Input"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("Input"); + auto* out = ctx.Output("Out"); auto x_vec_dims = phi::vectorize(x->dims()); @@ -48,18 +46,21 @@ class SliceMKLDNNKernel : public framework::OpKernel { std::vector ends(ctx.Attr>("ends").begin(), ctx.Attr>("ends").end()); - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = GetDataFromTensor( + ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } auto decrease_axis = ctx.Attr>("decrease_axis"); - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = + GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } @@ -141,8 +142,8 @@ class SliceGradMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("Input")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("Input")); auto dx_vec_dims = phi::vectorize(dx->dims()); auto dout_vec_dims = phi::vectorize(dout->dims()); @@ -158,16 +159,19 @@ class SliceGradMKLDNNKernel : public framework::OpKernel { std::vector ends(ctx.Attr>("ends").begin(), ctx.Attr>("ends").end()); - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = GetDataFromTensor( + ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = + GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 2bb82186483da..644998ea5ecdb 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -19,7 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using paddle::framework::Tensor; using paddle::platform::MKLDNNDeviceContext; using paddle::platform::MKLDNNMemDesc; @@ -39,8 +38,8 @@ class SoftmaxMKLDNNHandler public: SoftmaxMKLDNNHandler(const dnnl::engine mkldnn_engine, platform::Place cpu_place, - const Tensor* input, - Tensor* output, + const phi::DenseTensor* input, + phi::DenseTensor* output, const int axis) : platform::MKLDNNHandlerNoCachingT { auto& dev_ctx = ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const Tensor* input = ctx.Input("X"); - Tensor* output = ctx.Output("Out"); + const phi::DenseTensor* input = ctx.Input("X"); + phi::DenseTensor* output = ctx.Output("Out"); bool is_inplaced = input->IsSharedBufferWith(*output); const int axis = @@ -143,9 +142,11 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { "Operator DNNL SoftmaxGrad must use CPUPlace")); auto& dev_ctx = ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const Tensor* output = ctx.Input("Out"); - auto* out_grad = ctx.template Input(framework::GradVarName("Out")); - auto* in_x_grad = ctx.template Output(framework::GradVarName("X")); + const phi::DenseTensor* output = ctx.Input("Out"); + auto* out_grad = + ctx.template Input(framework::GradVarName("Out")); + auto* in_x_grad = + ctx.template Output(framework::GradVarName("X")); SoftmaxMKLDNNHandler handler(ctx, mkldnn_engine, diff --git a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h index c41864ee26f55..25886c5791fea 100644 --- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h +++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h @@ -18,14 +18,12 @@ limitations under the License. */ namespace paddle { namespace operators { -using paddle::framework::Tensor; - template class SoftplusMKLDNNHandler : public platform::MKLDNNHandlerNoCachingT { public: SoftplusMKLDNNHandler(const framework::ExecutionContext& ctx, - const Tensor* x, + const phi::DenseTensor* x, const float beta, const dnnl::engine engine) : platform::MKLDNNHandlerNoCachingT(engine, @@ -70,8 +68,8 @@ void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - const auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + const auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); bool is_inplaced = x->IsSharedBufferWith(*out); diff --git a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc index f71931ad1ecc7..33c8c563a9f03 100644 --- a/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/split_mkldnn_op.cc @@ -18,8 +18,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using paddle::framework::Tensor; - static inline std::vector> CalculateOutsDims( const framework::DDim& in_dims, const size_t num, @@ -63,8 +61,8 @@ class SplitMKLDNNKernel : public framework::OpKernel { ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - const auto* x = ctx.Input("X"); - auto outs = ctx.MultiOutput("Out"); + const auto* x = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); int num = ctx.Attr("num"); auto sections = ctx.Attr>("sections"); @@ -74,12 +72,13 @@ class SplitMKLDNNKernel : public framework::OpKernel { bool need_resize = false; if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); + auto* axis_tensor = ctx.Input("AxisTensor"); axis = GetDataFromTensor(axis_tensor)[0]; need_resize = true; } - auto sections_tensor_list = ctx.MultiInput("SectionsTensorList"); + auto sections_tensor_list = + ctx.MultiInput("SectionsTensorList"); if (sections_tensor_list.size() > 0) { sections = GetDataFromTensorList(sections_tensor_list); need_resize = true; diff --git a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc index 1e546e44fa241..4426f820b64d0 100644 --- a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc @@ -23,7 +23,7 @@ using dnnl::primitive; using dnnl::stream; using framework::DataLayout; using framework::LoDTensor; -using framework::Tensor; + using platform::to_void_cast; template @@ -32,8 +32,8 @@ class StackMKLDNNHandler public: StackMKLDNNHandler(const framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, - const std::vector& inputs, - Tensor* output) + const std::vector& inputs, + phi::DenseTensor* output) : platform::MKLDNNHandlerNoCachingT(mkldnn_engine, ctx.GetPlace()) { int stack_axis = ctx.Attr("axis"); @@ -93,7 +93,8 @@ class StackMKLDNNHandler dst_md, stack_axis, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory(const Tensor& input, int i) { + std::shared_ptr AcquireSrcMemory(const phi::DenseTensor& input, + int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), to_void_cast(input_data)); @@ -108,9 +109,9 @@ class StackMKLDNNOpKernel : public paddle::framework::OpKernel { ctx.template device_context(); const auto& mkldnn_engine = dev_ctx.GetEngine(); - auto multi_input = ctx.MultiInput("X"); + auto multi_input = ctx.MultiInput("X"); - Tensor* output = ctx.Output("Y"); + phi::DenseTensor* output = ctx.Output("Y"); StackMKLDNNHandler handler(ctx, mkldnn_engine, multi_input, output); diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 072016d729cdb..ab415ff47a0ec 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -38,7 +38,7 @@ namespace operators { using paddle::platform::MKLDNNDeviceContext; using phi::CPUContext; using platform::to_void_cast; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using LoDTensor = framework::LoDTensor; @@ -84,7 +84,7 @@ class SumMKLDNNHandler new dnnl::sum::primitive_desc(dst_md, scales, srcs_md, this->engine_)); } - std::shared_ptr AcquireSrcMemory(const framework::Tensor& input, + std::shared_ptr AcquireSrcMemory(const phi::DenseTensor& input, int i) { const T* input_data = input.data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->src_desc(i), diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index a01901950bc41..b7b0f33ade85c 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -21,7 +21,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using framework::DataLayout; template @@ -50,7 +50,7 @@ class TransposeMKLDNNHandler { return std::make_shared(src_md, engine_, ptr); } - std::shared_ptr AcquireDstMemory(framework::Tensor* output, + std::shared_ptr AcquireDstMemory(phi::DenseTensor* output, platform::Place place) { auto dst_md = Axis2MemoryDesc(dims_, axis_); auto dst_data = output->mutable_data(place, dst_md.get_size()); @@ -101,8 +101,8 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel { const auto& mkldnn_engine = dev_ctx.GetEngine(); std::vector axis = ctx.Attr>("axis"); int ndims = axis.size(); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); const T* input_data = input->data(); if (ndims == 1) { @@ -140,9 +140,8 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel { true, paddle::platform::errors::PreconditionNotMet( "Operator DNNL TransposeGrad must use CPUPlace")); - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); - auto* x_grad = ctx.Output(framework::GradVarName("X")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); if (!x_grad) return; auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index 4cd754775d9c0..a9da6ea2abb56 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -206,7 +206,7 @@ MLUCnnlTensorDesc::MLUCnnlTensorDesc(const int tensor_dim, cnnlSetTensorDescriptorPosition(raw_tensor_desc, position)); } -MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, +MLUCnnlTensorDesc::MLUCnnlTensorDesc(const phi::DenseTensor& tensor, const cnnlTensorLayout_t layout, const cnnlDataType_t tensor_dtype) { auto dims = phi::vectorize(tensor.dims()); @@ -227,11 +227,11 @@ MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, } } -MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor) +MLUCnnlTensorDesc::MLUCnnlTensorDesc(const phi::DenseTensor& tensor) : MLUCnnlTensorDesc( tensor, CNNL_LAYOUT_ARRAY, ToCnnlDataType(tensor.dtype())) {} -MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, +MLUCnnlTensorDesc::MLUCnnlTensorDesc(const phi::DenseTensor& tensor, cnnlTensorLayout_t layout, const cnnlDataType_t tensor_dtype, int position) @@ -240,7 +240,7 @@ MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, cnnlSetTensorDescriptorPosition(raw_tensor_desc, position)); } -MLUCnnlTensorDesc::MLUCnnlTensorDesc(const Tensor& tensor, +MLUCnnlTensorDesc::MLUCnnlTensorDesc(const phi::DenseTensor& tensor, cnnlTensorLayout_t layout, const cnnlDataType_t tensor_dtype, int position, diff --git a/paddle/fluid/operators/mlu/mlu_baseop.h b/paddle/fluid/operators/mlu/mlu_baseop.h index e56331b2728c4..f8d5bfd205c7c 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.h +++ b/paddle/fluid/operators/mlu/mlu_baseop.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; using ExecutionContext = framework::ExecutionContext; using DeviceContextPool = platform::DeviceContextPool; @@ -86,9 +86,9 @@ inline cnnlInterpBackwardMode_t GetMLUCnnlInterpBackwardMode( "Not support interp mode of MLU Device: %s", interp_mode)); } -inline const void* GetBasePtr(const Tensor* t) { return t->data(); } +inline const void* GetBasePtr(const phi::DenseTensor* t) { return t->data(); } -inline void* GetBasePtr(Tensor* t) { return t->data(); } +inline void* GetBasePtr(phi::DenseTensor* t) { return t->data(); } inline cnnlDataType_t ToCnnlDataType( const paddle::experimental::DataType& dtype) { @@ -256,18 +256,18 @@ class MLUCnnlTensorDesc { const cnnlDataType_t tensor_dtype, int position); - MLUCnnlTensorDesc(const Tensor& tensor, + MLUCnnlTensorDesc(const phi::DenseTensor& tensor, const cnnlTensorLayout_t layout, const cnnlDataType_t tensor_dtype); - explicit MLUCnnlTensorDesc(const Tensor& tensor); + explicit MLUCnnlTensorDesc(const phi::DenseTensor& tensor); - MLUCnnlTensorDesc(const Tensor& tensor, + MLUCnnlTensorDesc(const phi::DenseTensor& tensor, cnnlTensorLayout_t layout, const cnnlDataType_t tensor_dtype, int position); - MLUCnnlTensorDesc(const Tensor& tensor, + MLUCnnlTensorDesc(const phi::DenseTensor& tensor, cnnlTensorLayout_t layout, const cnnlDataType_t tensor_dtype, int position, @@ -2211,8 +2211,8 @@ inline void SetMLUTransposePerm(const framework::DDim& dims, template inline void TransposeFromMLUTensor(const ExecutionContext& ctx, const std::vector perm, - const Tensor* transformed_input, - Tensor* transformed_output, + const phi::DenseTensor* transformed_input, + phi::DenseTensor* transformed_output, bool need_reshape_or_alloc) { const int dim_size = perm.size(); if (need_reshape_or_alloc) { @@ -2241,7 +2241,7 @@ inline void TransposeFromMLUTensor(const ExecutionContext& ctx, template inline void FillMLUTensorWithHostValue(const ExecutionContext& ctx, T value, - Tensor* out) { + phi::DenseTensor* out) { MLUCnnlTensorDesc out_desc(*out); MLUCnnl::Fill( ctx, CNNL_POINTER_MODE_HOST, &value, out_desc.get(), GetBasePtr(out)); diff --git a/paddle/fluid/operators/modified_huber_loss_op.cu b/paddle/fluid/operators/modified_huber_loss_op.cu index 67c3a5d90da9a..330f4ca3596bd 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cu +++ b/paddle/fluid/operators/modified_huber_loss_op.cu @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; struct ModifiedHuberLossBackward { template @@ -45,10 +45,10 @@ template class ModifiedHuberLossGradGPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("Y"); - auto* in1 = context.Input("IntermediateVal"); - auto* in2 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); + auto* in0 = context.Input("Y"); + auto* in1 = context.Input("IntermediateVal"); + auto* in2 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); if (out0) { auto counts = phi::product(in1->dims()); diff --git a/paddle/fluid/operators/modified_huber_loss_op.h b/paddle/fluid/operators/modified_huber_loss_op.h index cde9c818dd6af..50d5a14548e35 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.h +++ b/paddle/fluid/operators/modified_huber_loss_op.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -57,10 +57,10 @@ template class ModifiedHuberLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto* in1 = context.Input("Y"); - auto* out0 = context.Output("IntermediateVal"); - auto* out1 = context.Output("Out"); + auto* in0 = context.Input("X"); + auto* in1 = context.Input("Y"); + auto* out0 = context.Output("IntermediateVal"); + auto* out1 = context.Output("Out"); out0->mutable_data(context.GetPlace()); out1->mutable_data(context.GetPlace()); @@ -84,10 +84,10 @@ template class ModifiedHuberLossGradCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("Y"); - auto* in1 = context.Input("IntermediateVal"); - auto* in2 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); + auto* in0 = context.Input("Y"); + auto* in1 = context.Input("IntermediateVal"); + auto* in2 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); if (out0) { const T* y_ptr = in0->data(); diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index 2d4ca62955eb1..9a3d540ea2ee2 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -30,7 +30,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; constexpr int kMULMKLDNNINT8 = 1; constexpr int kMULMKLDNNFP32 = 2; diff --git a/paddle/fluid/operators/mul_op_npu.cc b/paddle/fluid/operators/mul_op_npu.cc index 6617cb277a791..ab8334909edcc 100644 --- a/paddle/fluid/operators/mul_op_npu.cc +++ b/paddle/fluid/operators/mul_op_npu.cc @@ -25,9 +25,9 @@ template class MulNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); int x_num_col_dims = ctx.Attr("x_num_col_dims"); int y_num_col_dims = ctx.Attr("y_num_col_dims"); auto stream = @@ -120,11 +120,11 @@ template class MulGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); int x_num_col_dims = ctx.Attr("x_num_col_dims"); int y_num_col_dims = ctx.Attr("y_num_col_dims"); auto stream = diff --git a/paddle/fluid/operators/multi_dot_op.cc b/paddle/fluid/operators/multi_dot_op.cc index eeeda2bcb93ff..b83bc8ea6541b 100644 --- a/paddle/fluid/operators/multi_dot_op.cc +++ b/paddle/fluid/operators/multi_dot_op.cc @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class MultiDotOpMaker : public framework::OpProtoAndCheckerMaker { public: diff --git a/paddle/fluid/operators/multinomial_op_npu.cc b/paddle/fluid/operators/multinomial_op_npu.cc index 305d7cc5cd70c..206c7b041a9b3 100644 --- a/paddle/fluid/operators/multinomial_op_npu.cc +++ b/paddle/fluid/operators/multinomial_op_npu.cc @@ -22,14 +22,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class NPUMultinomialKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const auto x = ctx.Input("X"); - auto out = ctx.Output("Out"); + const auto x = ctx.Input("X"); + auto out = ctx.Output("Out"); const int64_t num_samples = ctx.Attr("num_samples"); const bool replacement = ctx.Attr("replacement"); diff --git a/paddle/fluid/operators/multiplex_op.cc b/paddle/fluid/operators/multiplex_op.cc index 5931e8d301439..749849a333f3d 100644 --- a/paddle/fluid/operators/multiplex_op.cc +++ b/paddle/fluid/operators/multiplex_op.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class MultiplexOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc index 04fb6957580d6..52c3aa57604f8 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc @@ -20,7 +20,7 @@ namespace paddle { namespace operators { using framework::LoDTensor; -using framework::Tensor; + using platform::Communicator; template diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index c9c4d1a4c74f3..4020dfb9afc71 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -21,8 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class NCEOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index 2141ad0f50c76..8e7e02b9667b5 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -31,7 +31,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; using Sampler = math::Sampler; @@ -46,7 +46,7 @@ template void PrepareSamples(const framework::ExecutionContext &context, Sampler *sampler, Tensor *sample_labels) { - auto label = context.Input("Label"); + auto label = context.Input("Label"); const int64_t *label_data = label->data(); auto label_dims = label->dims(); // for unitest @@ -98,9 +98,10 @@ class NCEKernel : public framework::OpKernel { break; } case 2: { - auto dist_probs = context.Input("CustomDistProbs"); - auto dist_alias = context.Input("CustomDistAlias"); - auto dist_alias_probs = context.Input("CustomDistAliasProbs"); + auto dist_probs = context.Input("CustomDistProbs"); + auto dist_alias = context.Input("CustomDistAlias"); + auto dist_alias_probs = + context.Input("CustomDistAliasProbs"); PADDLE_ENFORCE_EQ( dist_probs->numel(), @@ -153,14 +154,15 @@ class NCEKernel : public framework::OpKernel { } std::vector sample_out_dims; - auto label = context.Input("Label"); + auto label = context.Input("Label"); Tensor *sample_labels; Tensor *sample_out; Tensor sample_labels_tmp, sample_out_tmp; if (is_test) { // set dims of output(SampleOut) int num_true_classes = label->dims().size() == 2 ? label->dims()[1] : 1; - sample_out_dims.push_back((context.Input("Input"))->dims()[0]); + sample_out_dims.push_back( + (context.Input("Input"))->dims()[0]); sample_out_dims.push_back( (num_true_classes == -1) ? -1 : (num_neg_samples + num_true_classes)); @@ -170,8 +172,8 @@ class NCEKernel : public framework::OpKernel { sample_out = &sample_out_tmp; sample_out->Resize(phi::make_ddim(sample_out_dims)); } else { - sample_labels = context.Output("SampleLabels"); - sample_out = context.Output("SampleLogits"); + sample_labels = context.Output("SampleLabels"); + sample_out = context.Output("SampleLogits"); } PrepareSamples(context, sampler, sample_labels); @@ -189,12 +191,12 @@ class NCEKernel : public framework::OpKernel { } T *sample_out_data = sample_out->mutable_data(context.GetPlace()); - auto sample_weight = context.Input("SampleWeight"); + auto sample_weight = context.Input("SampleWeight"); const T *sample_weight_data = nullptr; if (sample_weight != nullptr) { sample_weight_data = sample_weight->data(); } - auto out = context.Output("Cost"); + auto out = context.Output("Cost"); T *out_data = out->mutable_data(context.GetPlace()); int64_t num_true_class = 1; if (label != nullptr) { @@ -203,7 +205,7 @@ class NCEKernel : public framework::OpKernel { int64_t sampled_labels_num = sample_labels->dims()[1]; // T b = 1. / num_total_classes * num_neg_samples; // forward bias - auto bias = context.Input("Bias"); + auto bias = context.Input("Bias"); if (bias != nullptr) { const T *bias_data = bias->data(); for (int64_t i = 0; i < sample_labels->numel(); ++i) { @@ -215,9 +217,11 @@ class NCEKernel : public framework::OpKernel { } } // forward mul - auto input_mat = EigenMatrix::From(*(context.Input("Input"))); + auto input_mat = + EigenMatrix::From(*(context.Input("Input"))); - auto weight_mat = EigenMatrix::From(*(context.Input("Weight"))); + auto weight_mat = + EigenMatrix::From(*(context.Input("Weight"))); for (int64_t i = 0; i < sample_labels->numel(); ++i) { Eigen::Tensor result = (input_mat.chip(static_cast(i / sample_labels->dims()[1]), 0) * @@ -247,14 +251,15 @@ template class NCEGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto d_out = context.Input(framework::GradVarName("Cost")); + auto d_out = + context.Input(framework::GradVarName("Cost")); const T *d_out_data = d_out->data(); - auto label = context.Input("Label"); - auto sample_out = context.Input("SampleLogits"); + auto label = context.Input("Label"); + auto sample_out = context.Input("SampleLogits"); const T *sample_out_data = sample_out->data(); - auto sample_labels = context.Input("SampleLabels"); + auto sample_labels = context.Input("SampleLabels"); const int64_t *sample_labels_data = sample_labels->data(); - auto sample_weight = context.Input("SampleWeight"); + auto sample_weight = context.Input("SampleWeight"); const T *sample_weight_data = nullptr; if (sample_weight != nullptr) { sample_weight_data = sample_weight->data(); @@ -279,9 +284,10 @@ class NCEGradKernel : public framework::OpKernel { break; } case 2: { - auto dist_probs = context.Input("CustomDistProbs"); - auto dist_alias = context.Input("CustomDistAlias"); - auto dist_alias_probs = context.Input("CustomDistAliasProbs"); + auto dist_probs = context.Input("CustomDistProbs"); + auto dist_alias = context.Input("CustomDistAlias"); + auto dist_alias_probs = + context.Input("CustomDistAliasProbs"); PADDLE_ENFORCE_EQ( dist_probs->numel(), @@ -351,7 +357,8 @@ class NCEGradKernel : public framework::OpKernel { } // get d_bias - auto d_bias = context.Output(framework::GradVarName("Bias")); + auto d_bias = + context.Output(framework::GradVarName("Bias")); if (d_bias != nullptr) { T *d_bias_data = d_bias->mutable_data(context.GetPlace()); std::fill(d_bias_data, d_bias_data + d_bias->numel(), 0.0); @@ -364,12 +371,14 @@ class NCEGradKernel : public framework::OpKernel { if (!is_sparse) { // get d_w - auto d_w = context.Output(framework::GradVarName("Weight")); + auto d_w = + context.Output(framework::GradVarName("Weight")); if (d_w != nullptr) { auto d_w_data = d_w->mutable_data(context.GetPlace()); std::fill(d_w_data, d_w_data + d_w->numel(), 0.0); auto d_w_matrix = EigenMatrix::From(*d_w); - auto x_matrix = EigenMatrix::From(*(context.Input("Input"))); + auto x_matrix = + EigenMatrix::From(*(context.Input("Input"))); for (int64_t i = 0; i < sample_labels->numel(); ++i) { d_w_matrix.chip(sample_labels_data[i], 0) += x_matrix.chip(static_cast(i / sample_labels->dims()[1]), 0) * @@ -410,7 +419,8 @@ class NCEGradKernel : public framework::OpKernel { std::fill(d_w_data, d_w_data + d_table_value->numel(), 0.0); auto d_w_matrix = EigenMatrix::From(*d_table_value); - auto x_matrix = EigenMatrix::From(*(context.Input("Input"))); + auto x_matrix = + EigenMatrix::From(*(context.Input("Input"))); for (int64_t i = 0; i < sample_labels->numel(); ++i) { d_w_matrix.chip(d_w->Index(sample_labels_data[i]), 0) += x_matrix.chip(static_cast(i / sample_labels->dims()[1]), 0) * @@ -419,12 +429,14 @@ class NCEGradKernel : public framework::OpKernel { } // get d_x - auto d_x = context.Output(framework::GradVarName("Input")); + auto d_x = + context.Output(framework::GradVarName("Input")); if (d_x != nullptr) { auto *d_x_data = d_x->mutable_data(context.GetPlace()); std::fill(d_x_data, d_x_data + d_x->numel(), 0.0); auto d_x_matrix = EigenMatrix::From(*d_x); - auto w_matrix = EigenMatrix::From(*(context.Input("Weight"))); + auto w_matrix = + EigenMatrix::From(*(context.Input("Weight"))); for (int64_t i = 0; i < sample_labels->numel(); ++i) { d_x_matrix.chip(static_cast(i / sample_labels->dims()[1]), 0) += w_matrix.chip(sample_labels_data[i], 0) * sample_grad_data[i]; diff --git a/paddle/fluid/operators/norm_op_npu.cc b/paddle/fluid/operators/norm_op_npu.cc index e2b6875eeeaef..c5f0749227e23 100644 --- a/paddle/fluid/operators/norm_op_npu.cc +++ b/paddle/fluid/operators/norm_op_npu.cc @@ -16,7 +16,7 @@ namespace paddle { namespace operators { using DDim = framework::DDim; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; void CheckAxis(int axis, int rank) { // check the axis is in [-rank, rank-1] @@ -34,9 +34,9 @@ class NormNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { VLOG(4) << "Launch Norm Op Kernel on NPU." << std::endl; - auto *in_x = ctx.Input("X"); - auto *out_y = ctx.Output("Out"); - auto *out_norm = ctx.Output("Norm"); + auto *in_x = ctx.Input("X"); + auto *out_y = ctx.Output("Out"); + auto *out_norm = ctx.Output("Norm"); out_y->mutable_data(ctx.GetPlace()); out_norm->mutable_data(ctx.GetPlace()); auto xdim = in_x->dims(); @@ -67,10 +67,10 @@ class NormGradNPUKernel : public framework::OpKernel { float epsilon = ctx.Attr("epsilon"); int axis = ctx.Attr("axis"); - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Out"); - auto *dy = ctx.Input(framework::GradVarName("Out")); - auto *dx = ctx.Output(framework::GradVarName("X")); + auto *x = ctx.Input("X"); + auto *y = ctx.Input("Out"); + auto *dy = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); auto xdim = x->dims(); CheckAxis(axis, xdim.size()); diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h index 88f9ca02ff100..b331ef2529ac5 100644 --- a/paddle/fluid/operators/norm_utils.cu.h +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -37,7 +37,7 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; // math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu index 330163b1f9350..b9d46a8559bc6 100644 --- a/paddle/fluid/operators/number_count_op.cu +++ b/paddle/fluid/operators/number_count_op.cu @@ -38,7 +38,7 @@ static inline int GET_BLOCKS(const int N) { } using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template __global__ void initialize_zero_kernel(T* data, const int length) { diff --git a/paddle/fluid/operators/one_hot_op.cc b/paddle/fluid/operators/one_hot_op.cc index 59842249adcdd..8e1a07975e2da 100644 --- a/paddle/fluid/operators/one_hot_op.cc +++ b/paddle/fluid/operators/one_hot_op.cc @@ -65,7 +65,7 @@ class OneHotOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "depth_tensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/one_hot_op.cu b/paddle/fluid/operators/one_hot_op.cu index 85594ff05742e..c91f2995af042 100644 --- a/paddle/fluid/operators/one_hot_op.cu +++ b/paddle/fluid/operators/one_hot_op.cu @@ -70,9 +70,9 @@ class OneHotCUDAKernel : public framework::OpKernel { int depth = -1; if (context.HasInput("depth_tensor")) { - auto* depth_tensor = context.Input("depth_tensor"); + auto* depth_tensor = context.Input("depth_tensor"); if (platform::is_gpu_place(depth_tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync( *depth_tensor, platform::CPUPlace(), &temp); depth = *temp.data(); diff --git a/paddle/fluid/operators/one_hot_op.h b/paddle/fluid/operators/one_hot_op.h index 95d767fed805c..6e139c94880be 100644 --- a/paddle/fluid/operators/one_hot_op.h +++ b/paddle/fluid/operators/one_hot_op.h @@ -77,7 +77,7 @@ struct OneHotOpFunctor { }; using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class OneHotKernel : public framework::OpKernel { public: @@ -87,7 +87,7 @@ class OneHotKernel : public framework::OpKernel { int depth = context.Attr("depth"); bool allow_out_of_range = context.Attr("allow_out_of_range"); if (context.HasInput("depth_tensor")) { - auto* depth_tensor = context.Input("depth_tensor"); + auto* depth_tensor = context.Input("depth_tensor"); auto* depth_data = depth_tensor->data(); depth = depth_data[0]; auto in_dims = in->dims(); diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc index 5d6fe0d50bdd6..2ca74cac0a051 100644 --- a/paddle/fluid/operators/one_hot_op_npu.cc +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class OneHotNPUKernel : public framework::OpKernel { @@ -30,7 +30,7 @@ class OneHotNPUKernel : public framework::OpKernel { int depth = ctx.Attr("depth"); if (ctx.HasInput("depth_tensor")) { - auto* depth_tensor = ctx.Input("depth_tensor"); + auto* depth_tensor = ctx.Input("depth_tensor"); std::vector depth_data; framework::TensorToVector(*depth_tensor, dev_ctx, &depth_data); depth = depth_data[0]; diff --git a/paddle/fluid/operators/one_hot_op_xpu.cc b/paddle/fluid/operators/one_hot_op_xpu.cc index 7c213956bfde4..6812a2415ed53 100644 --- a/paddle/fluid/operators/one_hot_op_xpu.cc +++ b/paddle/fluid/operators/one_hot_op_xpu.cc @@ -23,7 +23,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class OneHotXPUKernel : public framework::OpKernel { @@ -37,7 +37,7 @@ class OneHotXPUKernel : public framework::OpKernel { // get depth from input tensor if (context.HasInput("depth_tensor")) { - auto* depth_tensor = context.Input("depth_tensor"); + auto* depth_tensor = context.Input("depth_tensor"); auto* depth_data = depth_tensor->data(); if (platform::is_xpu_place(depth_tensor->place())) { xpu_memcpy(static_cast(&depth), diff --git a/paddle/fluid/operators/one_hot_v2_op.cc b/paddle/fluid/operators/one_hot_v2_op.cc index daf491c64b6d4..55cb5d1a53b2f 100644 --- a/paddle/fluid/operators/one_hot_v2_op.cc +++ b/paddle/fluid/operators/one_hot_v2_op.cc @@ -38,7 +38,7 @@ class OneHotV2Op : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "depth_tensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/one_hot_v2_op_mlu.cc b/paddle/fluid/operators/one_hot_v2_op_mlu.cc index f574cc525f142..a7b1a30afe567 100644 --- a/paddle/fluid/operators/one_hot_v2_op_mlu.cc +++ b/paddle/fluid/operators/one_hot_v2_op_mlu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -33,7 +33,8 @@ class OneHotV2MLUKernel : public framework::OpKernel { int depth = ctx.Attr("depth"); if (ctx.HasInput("depth_tensor")) { std::vector depth_data; - depth_data = GetDataFromTensor(ctx.Input("depth_tensor")); + depth_data = + GetDataFromTensor(ctx.Input("depth_tensor")); depth = depth_data[0]; auto out_dims = out->dims(); diff --git a/paddle/fluid/operators/one_hot_v2_op_npu.cc b/paddle/fluid/operators/one_hot_v2_op_npu.cc index 8399d41050399..1ea952cfcb7e6 100644 --- a/paddle/fluid/operators/one_hot_v2_op_npu.cc +++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -31,7 +31,7 @@ class OneHotV2NPUKernel : public framework::OpKernel { int depth = ctx.Attr("depth"); if (ctx.HasInput("depth_tensor")) { - auto* depth_tensor = ctx.Input("depth_tensor"); + auto* depth_tensor = ctx.Input("depth_tensor"); std::vector depth_data; framework::TensorToVector(*depth_tensor, dev_ctx, &depth_data); depth = depth_data[0]; diff --git a/paddle/fluid/operators/optimizers/adadelta_op.cc b/paddle/fluid/operators/optimizers/adadelta_op.cc index ef37c21496e55..4390da3c4e479 100644 --- a/paddle/fluid/operators/optimizers/adadelta_op.cc +++ b/paddle/fluid/operators/optimizers/adadelta_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AdadeltaOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc index ae05070692fb0..e122b4c92822b 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/adagrad_op.cc @@ -25,7 +25,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index cdbd8c4b9dfd2..aa331df4cbd0c 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AdamOp : public framework::OperatorWithKernel { public: @@ -34,7 +34,7 @@ class AdamOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (var_name == "Beta1Pow" || var_name == "Beta2Pow" || var_name == "SkipUpdate") { diff --git a/paddle/fluid/operators/optimizers/adam_op_functor.h b/paddle/fluid/operators/optimizers/adam_op_functor.h index 15dee861b874e..7be2ab055cd41 100644 --- a/paddle/fluid/operators/optimizers/adam_op_functor.h +++ b/paddle/fluid/operators/optimizers/adam_op_functor.h @@ -23,9 +23,9 @@ namespace operators { namespace scatter = paddle::operators::math::scatter; -static inline float GetAttrFromTensor(const framework::Tensor* tensor) { +static inline float GetAttrFromTensor(const phi::DenseTensor* tensor) { const float* tensor_data = tensor->data(); - framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; if (platform::is_gpu_place(tensor->place())) { paddle::framework::TensorCopySync( *tensor, platform::CPUPlace(), &cpu_tensor); diff --git a/paddle/fluid/operators/optimizers/adam_op_mlu.cc b/paddle/fluid/operators/optimizers/adam_op_mlu.cc index 6ee63354fbff4..af912a5acab6e 100644 --- a/paddle/fluid/operators/optimizers/adam_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_mlu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -48,8 +48,8 @@ class AdamMLUKernel : public framework::OpKernel { auto* mom2 = ctx.Input("Moment2"); auto* lr = ctx.Input("LearningRate"); - auto* beta1_pow = ctx.Input("Beta1Pow"); - auto* beta2_pow = ctx.Input("Beta2Pow"); + auto* beta1_pow = ctx.Input("Beta1Pow"); + auto* beta2_pow = ctx.Input("Beta2Pow"); auto* param_out = ctx.Output("ParamOut"); auto* mom1_out = ctx.Output("Moment1Out"); @@ -59,7 +59,7 @@ class AdamMLUKernel : public framework::OpKernel { bool skip_update = false; if (ctx.HasInput("SkipUpdate")) { - auto* skip_update_tensor = ctx.Input("SkipUpdate"); + auto* skip_update_tensor = ctx.Input("SkipUpdate"); PADDLE_ENFORCE_EQ(skip_update_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -153,16 +153,16 @@ class AdamMLUKernel : public framework::OpKernel { "value is:%d.", beta2_pow_out->numel())); - const Tensor* beta1_tensor = nullptr; - const Tensor* beta2_tensor = nullptr; - const Tensor* epsilon_tensor = nullptr; + const phi::DenseTensor* beta1_tensor = nullptr; + const phi::DenseTensor* beta2_tensor = nullptr; + const phi::DenseTensor* epsilon_tensor = nullptr; Tensor beta1_tmp(experimental::DataType::FLOAT32); Tensor beta2_tmp(experimental::DataType::FLOAT32); Tensor epsilon_tmp(experimental::DataType::FLOAT32); if (ctx.HasInput("Beta1Tensor")) { - beta1_tensor = ctx.Input("Beta1Tensor"); + beta1_tensor = ctx.Input("Beta1Tensor"); PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -181,7 +181,7 @@ class AdamMLUKernel : public framework::OpKernel { } if (ctx.HasInput("Beta2Tensor")) { - beta2_tensor = ctx.Input("Beta2Tensor"); + beta2_tensor = ctx.Input("Beta2Tensor"); PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -200,7 +200,7 @@ class AdamMLUKernel : public framework::OpKernel { } if (ctx.HasInput("EpsilonTensor")) { - epsilon_tensor = ctx.Input("EpsilonTensor"); + epsilon_tensor = ctx.Input("EpsilonTensor"); PADDLE_ENFORCE_EQ(epsilon_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -278,7 +278,7 @@ class AdamWMLUKernel : public AdamMLUKernel { bool skip_update = false; if (ctx.HasInput("SkipUpdate")) { VLOG(3) << "Has SkipUpdate"; - auto* skip_update_tensor = ctx.Input("SkipUpdate"); + auto* skip_update_tensor = ctx.Input("SkipUpdate"); PADDLE_ENFORCE_EQ(skip_update_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -338,19 +338,19 @@ class MergedAdamMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // Get inputs and outputs - auto params = ctx.MultiInput("Param"); - auto grads = ctx.MultiInput("Grad"); - auto lrs = ctx.MultiInput("LearningRate"); - auto mom1s = ctx.MultiInput("Moment1"); - auto mom2s = ctx.MultiInput("Moment2"); - auto beta1_pows = ctx.MultiInput("Beta1Pow"); - auto beta2_pows = ctx.MultiInput("Beta2Pow"); - auto master_params = ctx.MultiInput("MasterParam"); - auto param_outs = ctx.MultiOutput("ParamOut"); - auto mom1_outs = ctx.MultiOutput("Moment1Out"); - auto mom2_outs = ctx.MultiOutput("Moment2Out"); - auto beta1_pow_outs = ctx.MultiOutput("Beta1PowOut"); - auto beta2_pow_outs = ctx.MultiOutput("Beta2PowOut"); + auto params = ctx.MultiInput("Param"); + auto grads = ctx.MultiInput("Grad"); + auto lrs = ctx.MultiInput("LearningRate"); + auto mom1s = ctx.MultiInput("Moment1"); + auto mom2s = ctx.MultiInput("Moment2"); + auto beta1_pows = ctx.MultiInput("Beta1Pow"); + auto beta2_pows = ctx.MultiInput("Beta2Pow"); + auto master_params = ctx.MultiInput("MasterParam"); + auto param_outs = ctx.MultiOutput("ParamOut"); + auto mom1_outs = ctx.MultiOutput("Moment1Out"); + auto mom2_outs = ctx.MultiOutput("Moment2Out"); + auto beta1_pow_outs = ctx.MultiOutput("Beta1PowOut"); + auto beta2_pow_outs = ctx.MultiOutput("Beta2PowOut"); // Check validation of inputs and outputs size_t param_num = params.size(); @@ -365,7 +365,7 @@ class MergedAdamMLUKernel : public framework::OpKernel { bool skip_update = false; if (ctx.HasInput("SkipUpdate")) { - auto* skip_update_tensor = ctx.Input("SkipUpdate"); + auto* skip_update_tensor = ctx.Input("SkipUpdate"); PADDLE_ENFORCE_EQ(skip_update_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -416,9 +416,9 @@ class MergedAdamMLUKernel : public framework::OpKernel { VLOG(4) << "use_global_beta_pow:" << use_global_beta_pow; // Get beta1, beta2 and epsilon from attribute. - const Tensor* beta1_tensor = nullptr; - const Tensor* beta2_tensor = nullptr; - const Tensor* epsilon_tensor = nullptr; + const phi::DenseTensor* beta1_tensor = nullptr; + const phi::DenseTensor* beta2_tensor = nullptr; + const phi::DenseTensor* epsilon_tensor = nullptr; Tensor beta1_tmp(experimental::DataType::FLOAT32); Tensor beta2_tmp(experimental::DataType::FLOAT32); diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc index 3642c45ba6aab..d7850cae972d1 100644 --- a/paddle/fluid/operators/optimizers/adam_op_npu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -51,8 +51,8 @@ class AdamNPUKernel : public framework::OpKernel { auto* mom2 = ctx.Input("Moment2"); auto* lr = ctx.Input("LearningRate"); - auto* beta1_pow = ctx.Input("Beta1Pow"); - auto* beta2_pow = ctx.Input("Beta2Pow"); + auto* beta1_pow = ctx.Input("Beta1Pow"); + auto* beta2_pow = ctx.Input("Beta2Pow"); auto* param_out = ctx.Output("ParamOut"); auto* mom1_out = ctx.Output("Moment1Out"); @@ -62,7 +62,7 @@ class AdamNPUKernel : public framework::OpKernel { bool skip_update = false; if (ctx.HasInput("SkipUpdate")) { - auto* skip_update_tensor = ctx.Input("SkipUpdate"); + auto* skip_update_tensor = ctx.Input("SkipUpdate"); PADDLE_ENFORCE_EQ(skip_update_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -129,16 +129,16 @@ class AdamNPUKernel : public framework::OpKernel { beta2_pow = &beta2_pow_tmp; } - const Tensor* beta1_tensor = nullptr; - const Tensor* beta2_tensor = nullptr; - const Tensor* epsilon_tensor = nullptr; + const phi::DenseTensor* beta1_tensor = nullptr; + const phi::DenseTensor* beta2_tensor = nullptr; + const phi::DenseTensor* epsilon_tensor = nullptr; Tensor beta1_tmp(experimental::DataType::FLOAT32); Tensor beta2_tmp(experimental::DataType::FLOAT32); Tensor epsilon_tmp(experimental::DataType::FLOAT32); if (ctx.HasInput("Beta1Tensor")) { - beta1_tensor = ctx.Input("Beta1Tensor"); + beta1_tensor = ctx.Input("Beta1Tensor"); PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -152,7 +152,7 @@ class AdamNPUKernel : public framework::OpKernel { } if (ctx.HasInput("Beta2Tensor")) { - beta2_tensor = ctx.Input("Beta2Tensor"); + beta2_tensor = ctx.Input("Beta2Tensor"); PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -166,7 +166,7 @@ class AdamNPUKernel : public framework::OpKernel { } if (ctx.HasInput("EpsilonTensor")) { - epsilon_tensor = ctx.Input("EpsilonTensor"); + epsilon_tensor = ctx.Input("EpsilonTensor"); PADDLE_ENFORCE_EQ(epsilon_tensor->numel(), 1, platform::errors::InvalidArgument( @@ -264,7 +264,7 @@ class AdamWNPUKernel : public AdamNPUKernel { bool skip_update = false; if (ctx.HasInput("SkipUpdate")) { VLOG(3) << "Has SkipUpdate"; - auto* skip_update_tensor = ctx.Input("SkipUpdate"); + auto* skip_update_tensor = ctx.Input("SkipUpdate"); PADDLE_ENFORCE_EQ(skip_update_tensor->numel(), 1, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/optimizers/adamax_op.cc b/paddle/fluid/operators/optimizers/adamax_op.cc index 75f9e25796ea0..5298030f17a04 100644 --- a/paddle/fluid/operators/optimizers/adamax_op.cc +++ b/paddle/fluid/operators/optimizers/adamax_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class AdamaxOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 90ce98c4dc316..94a52d9765bfa 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class DecayedAdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h index 741a12ded2e0a..98d807b9e9977 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h @@ -40,8 +40,8 @@ class DecayedAdagradOpKernel : public framework::OpKernel { ctx.InputNames("Grad").front(), framework::ToTypeName(grad_var->Type()))); - auto param_out_tensor = ctx.Output("ParamOut"); - auto moment_out_tensor = ctx.Output("MomentOut"); + auto param_out_tensor = ctx.Output("ParamOut"); + auto moment_out_tensor = ctx.Output("MomentOut"); param_out_tensor->mutable_data(ctx.GetPlace()); moment_out_tensor->mutable_data(ctx.GetPlace()); @@ -50,13 +50,13 @@ class DecayedAdagradOpKernel : public framework::OpKernel { float epsilon = ctx.Attr("epsilon"); auto param = framework::EigenVector::Flatten( - *ctx.Input("Param")); + *ctx.Input("Param")); auto grad = framework::EigenVector::Flatten( - *ctx.Input("Grad")); + *ctx.Input("Grad")); auto moment = framework::EigenVector::Flatten( - *ctx.Input("Moment")); + *ctx.Input("Moment")); auto lr = framework::EigenVector::Flatten( - *ctx.Input("LearningRate")); + *ctx.Input("LearningRate")); auto param_out = framework::EigenVector::Flatten(*param_out_tensor); auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); diff --git a/paddle/fluid/operators/optimizers/dgc_momentum_op.cc b/paddle/fluid/operators/optimizers/dgc_momentum_op.cc index 09847ff216f5a..2b4b1c1a109bd 100644 --- a/paddle/fluid/operators/optimizers/dgc_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/dgc_momentum_op.cc @@ -37,7 +37,7 @@ class DGCMomentumOp : public MomentumOp { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "current_step" || var_name == "nranks") { VLOG(10) << "var_name:" << var_name << " need not to transform"; diff --git a/paddle/fluid/operators/optimizers/dgc_momentum_op.h b/paddle/fluid/operators/optimizers/dgc_momentum_op.h index 5ea3a4cc808d9..86e069fe45e63 100644 --- a/paddle/fluid/operators/optimizers/dgc_momentum_op.h +++ b/paddle/fluid/operators/optimizers/dgc_momentum_op.h @@ -34,11 +34,11 @@ class DGCMomentumKernel : public framework::OpKernel { return; } - auto current_step_tensor = context.Input("current_step"); + auto current_step_tensor = context.Input("current_step"); auto* current_step = current_step_tensor->data(); // nranks - auto nranks_tensor = context.Input("nranks"); + auto nranks_tensor = context.Input("nranks"); const int nranks = static_cast(*nranks_tensor->data()); PADDLE_ENFORCE_GT( nranks, @@ -47,8 +47,8 @@ class DGCMomentumKernel : public framework::OpKernel { "DGC is not useful when num_trainers <= 1, but now nranks=%d", nranks)); - const framework::Tensor* g = context.Input("Grad"); - framework::Tensor* g_out = context.Output("Grad_out"); + const phi::DenseTensor* g = context.Input("Grad"); + phi::DenseTensor* g_out = context.Output("Grad_out"); auto g_e = framework::EigenVector::Flatten(*g); auto g_out_e = framework::EigenVector::Flatten(*g_out); @@ -64,16 +64,16 @@ class DGCMomentumKernel : public framework::OpKernel { const auto* grad_var = context.InputVar("Grad"); if (static_cast(*current_step) < static_cast(rampup_begin_step)) { VLOG(10) << " so use momentum optimizer"; - auto* learning_rate = context.Input("LearningRate"); + auto* learning_rate = context.Input("LearningRate"); bool multi_precision = context.Attr("multi_precision"); - auto* param = context.Input("Param"); - auto* velocity = context.Input("Velocity"); - auto* param_out = context.Output("ParamOut"); - auto* velocity_out = context.Output("VelocityOut"); + auto* param = context.Input("Param"); + auto* velocity = context.Input("Velocity"); + auto* param_out = context.Output("ParamOut"); + auto* velocity_out = context.Output("VelocityOut"); auto* master_param_out = - context.Output("MasterParamOut"); - paddle::optional master_param_opt(paddle::none); + context.Output("MasterParamOut"); + paddle::optional master_param_opt(paddle::none); float mu = context.Attr("mu"); bool use_nesterov = context.Attr("use_nesterov"); std::string regularization_method = @@ -81,9 +81,9 @@ class DGCMomentumKernel : public framework::OpKernel { float regularization_coeff = context.Attr("regularization_coeff"); float rescale_grad = context.Attr("rescale_grad"); - if (grad_var->IsType()) { + if (grad_var->IsType()) { // sgd_dense - auto* grad = context.Input("Grad"); + auto* grad = context.Input("Grad"); phi::MomentumDenseKernel( static_cast::TYPE&>(dev_ctx), @@ -130,22 +130,22 @@ class DGCMomentumKernel : public framework::OpKernel { const auto* param_var = context.InputVar("Param"); - auto* learning_rate = context.Input("LearningRate"); + auto* learning_rate = context.Input("LearningRate"); bool multi_precision = context.Attr("multi_precision"); if (param_var->IsType()) { - auto* param = context.Input("Param"); - auto* param_out = context.Output("ParamOut"); + auto* param = context.Input("Param"); + auto* param_out = context.Output("ParamOut"); auto* master_param_out = - context.Output("MasterParamOut"); - paddle::optional master_param_opt(paddle::none); + context.Output("MasterParamOut"); + paddle::optional master_param_opt(paddle::none); if (multi_precision) { - auto* master_param = context.Input("MasterParam"); + auto* master_param = context.Input("MasterParam"); master_param_opt = *master_param; } - if (grad_var->IsType()) { + if (grad_var->IsType()) { // sgd_dense - auto* grad = context.Input("Grad"); + auto* grad = context.Input("Grad"); phi::SGDDenseKernel( static_cast::TYPE&>(dev_ctx), diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu b/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu index d922b2a30cf90..2ad50781ad985 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu @@ -28,8 +28,8 @@ using phi::funcs::FlattenToString; using phi::funcs::ToVector; struct ParamGradInfo { - framework::Tensor *param_t{nullptr}; - framework::Tensor *grad_t{nullptr}; + phi::DenseTensor *param_t{nullptr}; + phi::DenseTensor *grad_t{nullptr}; size_t idx{0}; size_t numel{0}; size_t numel_with_padding{0}; @@ -182,7 +182,7 @@ static size_t FillAlignmentPaddingInfo(std::vector *infos, template static T *TensorFillConstant(const phi::GPUContext &dev_ctx, - framework::Tensor *tensor, + phi::DenseTensor *tensor, const framework::DDim &dims, T value) { tensor->Resize(dims); @@ -192,10 +192,10 @@ static T *TensorFillConstant(const phi::GPUContext &dev_ctx, return ptr; } -static framework::Tensor CastDataForInitedTensor(const phi::GPUContext &dev_ctx, - framework::Tensor *origin, - framework::Tensor *fused_out, - size_t numel_offset) { +static phi::DenseTensor CastDataForInitedTensor(const phi::GPUContext &dev_ctx, + phi::DenseTensor *origin, + phi::DenseTensor *fused_out, + size_t numel_offset) { PADDLE_ENFORCE_EQ(origin->IsInitialized(), true, platform::errors::InvalidArgument( @@ -224,9 +224,9 @@ static framework::Tensor CastDataForInitedTensor(const phi::GPUContext &dev_ctx, return sliced_tensor; } -static framework::Tensor CopyAndShareBufferForInitedTensor( - framework::Tensor *origin, - framework::Tensor *fused_out, +static phi::DenseTensor CopyAndShareBufferForInitedTensor( + phi::DenseTensor *origin, + phi::DenseTensor *fused_out, size_t numel_offset, gpuStream_t stream) { PADDLE_ENFORCE_EQ( @@ -271,8 +271,8 @@ static framework::Tensor CopyAndShareBufferForInitedTensor( return sliced_tensor; } -static void ShareBufferForNonInitedTensor(framework::Tensor *origin, - framework::Tensor *fused_out, +static void ShareBufferForNonInitedTensor(phi::DenseTensor *origin, + phi::DenseTensor *fused_out, size_t numel_offset, const framework::DDim &dims) { PADDLE_ENFORCE_EQ( @@ -295,7 +295,7 @@ static void ShareBufferForNonInitedTensor(framework::Tensor *origin, template static void CopyVectorToCPUTensor(const std::vector &src, - framework::Tensor *dst) { + phi::DenseTensor *dst) { dst->Resize({static_cast(src.size())}); T *dst_ptr = dst->mutable_data(platform::CPUPlace()); const T *src_ptr = src.data(); @@ -351,9 +351,9 @@ class DistributedFusedLambInitOpKernel // Step 1: Check Input(Param) and Output(ParamOut), Input(Grad) and // Output(GradOut) - auto params = ctx.MultiInput("Param"); - auto grads = ctx.MultiInput("Grad"); - auto master_params = ctx.MultiOutput("MasterParamOut"); + auto params = ctx.MultiInput("Param"); + auto grads = ctx.MultiInput("Grad"); + auto master_params = ctx.MultiOutput("MasterParamOut"); std::vector fp32_infos, fp16_infos; { PADDLE_ENFORCE_EQ(params.size(), @@ -362,8 +362,8 @@ class DistributedFusedLambInitOpKernel "The parameter number and parameter gradient " "number should be the same.")); - auto params_out = ctx.MultiOutput("ParamOut"); - auto grads_out = ctx.MultiOutput("GradOut"); + auto params_out = ctx.MultiOutput("ParamOut"); + auto grads_out = ctx.MultiOutput("GradOut"); PADDLE_ENFORCE_EQ( params.size(), params_out.size(), @@ -469,7 +469,7 @@ class DistributedFusedLambInitOpKernel size_t fp16_wd_end_idx = ReorderParamGradInfoList(apply_weight_decay, &fp16_infos); - auto *param_order_t = ctx.Output("ParamOrder"); + auto *param_order_t = ctx.Output("ParamOrder"); auto param_num = fp32_infos.size() + fp16_infos.size(); param_order_t->Resize({static_cast(param_num)}); auto *param_order = param_order_t->mutable_data(platform::CPUPlace()); @@ -535,30 +535,30 @@ class DistributedFusedLambInitOpKernel // Step 3: allocate output tensor and do initialization float *fused_fp32_param = nullptr, *fused_fp32_grad = nullptr; platform::float16 *fused_fp16_param = nullptr, *fused_fp16_grad = nullptr; - framework::Tensor *fp32_p_t = nullptr, *fp16_p_t = nullptr, - *fp32_g_t = nullptr, *fp16_g_t = nullptr; - std::vector fp16_master_params; + phi::DenseTensor *fp32_p_t = nullptr, *fp16_p_t = nullptr, + *fp32_g_t = nullptr, *fp16_g_t = nullptr; + std::vector fp16_master_params; if (total_numel > 0) { - fp32_p_t = ctx.Output("FP32FusedParam"); + fp32_p_t = ctx.Output("FP32FusedParam"); fused_fp32_param = TensorFillConstant( dev_ctx, fp32_p_t, {static_cast(total_numel)}, 0.0f); } if (fp32_numel > 0) { - fp32_g_t = ctx.Output("FP32FusedGrad"); + fp32_g_t = ctx.Output("FP32FusedGrad"); fused_fp32_grad = TensorFillConstant( dev_ctx, fp32_g_t, {static_cast(fp32_numel)}, 0.0f); } if (fp16_numel > 0) { - fp16_p_t = ctx.Output("FP16FusedParam"); + fp16_p_t = ctx.Output("FP16FusedParam"); fused_fp16_param = TensorFillConstant( dev_ctx, fp16_p_t, {static_cast(fp16_numel)}, static_cast(0)); - fp16_g_t = ctx.Output("FP16FusedGrad"); + fp16_g_t = ctx.Output("FP16FusedGrad"); fused_fp16_grad = TensorFillConstant( dev_ctx, fp16_g_t, @@ -622,19 +622,19 @@ class DistributedFusedLambInitOpKernel // Step 4: For Moment1, Moment2, Beta1Pow, Beta2Pow, just fill constant TensorFillConstant(dev_ctx, - ctx.Output("Moment1"), + ctx.Output("Moment1"), {static_cast(numel_each_device)}, 0.0f); TensorFillConstant(dev_ctx, - ctx.Output("Moment2"), + ctx.Output("Moment2"), {static_cast(numel_each_device)}, 0.0f); TensorFillConstant(dev_ctx, - ctx.Output("Beta1Pow"), + ctx.Output("Beta1Pow"), {1}, ctx.Attr("beta1")); TensorFillConstant(dev_ctx, - ctx.Output("Beta2Pow"), + ctx.Output("Beta2Pow"), {1}, ctx.Attr("beta2")); VLOG(10) << "Init Moment and BetaPow ends"; @@ -665,7 +665,7 @@ class DistributedFusedLambInitOpKernel size_t total_local_param_num = fp32_local_param_num + fp16_local_param_num; VLOG(10) << "Found the sharding arguments"; - auto *param_info_t = ctx.Output("ParamInfo"); + auto *param_info_t = ctx.Output("ParamInfo"); param_info_t->Resize({8}); auto *param_info = param_info_t->mutable_data(platform::CPUPlace()); param_info[0] = static_cast(fp32_start_idx); @@ -760,22 +760,22 @@ class DistributedFusedLambInitOpKernel } CopyVectorToCPUTensor(numel_offsets, - ctx.Output("FusedParamOffsets")); + ctx.Output("FusedParamOffsets")); CopyVectorToCPUTensor( fp32_partial_numel_offsets, - ctx.Output("FP32ShardFusedParamOffsets")); + ctx.Output("FP32ShardFusedParamOffsets")); CopyVectorToCPUTensor( fp16_partial_numel_offsets, - ctx.Output("FP16ShardFusedParamOffsets")); + ctx.Output("FP16ShardFusedParamOffsets")); - auto *global_scale = ctx.Output("GlobalScale"); + auto *global_scale = ctx.Output("GlobalScale"); if (!global_scale->IsInitialized()) { TensorFillConstant(dev_ctx, global_scale, {1}, 1.0f); } VLOG(10) << "Init global scale ends"; TensorFillConstant(dev_ctx, - ctx.Output("Step"), + ctx.Output("Step"), {1}, static_cast(0)); diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cc b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cc index 9f286fef47773..d810f8df7370a 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cc +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cc @@ -32,7 +32,7 @@ class DistributedFusedLambOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return expected_kernel_type; } diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu index 8a799f2bdc83c..908be3cd41d21 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu @@ -225,10 +225,10 @@ static void LogParamAndTrustRatioDivSquareNorm( const float *trust_ratio_div_square_norm) { if (!VLOG_IS_ON(LogLevel)) return; - auto tensors = ctx.MultiInput("Param"); + auto tensors = ctx.MultiInput("Param"); if (tensors.empty()) return; - const auto *order = ctx.Input("ParamOrder")->data(); + const auto *order = ctx.Input("ParamOrder")->data(); size_t n = tensors.size(); auto place = tensors[0]->place(); @@ -264,7 +264,7 @@ template static const T *GetInputTensorPtr(const framework::ExecutionContext &ctx, const char *in_name, int64_t *numel = nullptr) { - const auto *in_tensor = ctx.Input(in_name); + const auto *in_tensor = ctx.Input(in_name); PADDLE_ENFORCE_NOT_NULL( in_tensor, platform::errors::InvalidArgument("Input(%s) cannot be NULL.", in_name)); @@ -283,7 +283,7 @@ static T *GetSameInOutTensorPtr(const framework::ExecutionContext &ctx, const char *in_name, const char *out_name, int64_t *numel = nullptr) { - const auto *in_tensor = ctx.Input(in_name); + const auto *in_tensor = ctx.Input(in_name); if (in_tensor == nullptr || !in_tensor->IsInitialized()) { PADDLE_ENFORCE_EQ(AllowNotExist, true, @@ -293,7 +293,7 @@ static T *GetSameInOutTensorPtr(const framework::ExecutionContext &ctx, return nullptr; } - auto *out_tensor = ctx.Output(out_name); + auto *out_tensor = ctx.Output(out_name); PADDLE_ENFORCE_NOT_NULL( in_tensor, platform::errors::InvalidArgument("Input(%s) cannot be NULL.", in_name)); @@ -1145,8 +1145,7 @@ static std::string GetMinMaxStr(const T *x, } struct VisitDTypeFunctor { - VisitDTypeFunctor(const framework::Tensor *x, std::string *s) - : x_(x), s_(s) {} + VisitDTypeFunctor(const phi::DenseTensor *x, std::string *s) : x_(x), s_(s) {} template void apply() const { @@ -1154,11 +1153,11 @@ struct VisitDTypeFunctor { } private: - const framework::Tensor *x_; + const phi::DenseTensor *x_; std::string *s_; }; -static std::string GetMinMaxStr(const framework::Tensor *x) { +static std::string GetMinMaxStr(const phi::DenseTensor *x) { if (x == nullptr) return "null"; if (!x->IsInitialized()) return "not_inited"; if (!platform::is_gpu_place(x->place())) return "CPUTensor"; @@ -1173,7 +1172,7 @@ static void PrintAllMinMaxRange(const framework::ExecutionContext &ctx, if (!VLOG_IS_ON(1)) return; for (const auto &pair : ctx.GetOp().Inputs()) { const auto &key = pair.first; - const auto tensors = ctx.MultiInput(key); + const auto tensors = ctx.MultiInput(key); size_t n = tensors.size(); for (size_t i = 0; i < n; ++i) { VLOG(1) << "Input(" << key + ")[" << i << "] = " << pair.second[i] @@ -1184,7 +1183,7 @@ static void PrintAllMinMaxRange(const framework::ExecutionContext &ctx, if (only_inputs) return; for (const auto &pair : ctx.GetOp().Outputs()) { const auto &key = pair.first; - const auto tensors = ctx.MultiOutput(key); + const auto tensors = ctx.MultiOutput(key); size_t n = tensors.size(); for (size_t i = 0; i < n; ++i) { VLOG(1) << "Output(" << key + ")[" << i << "] = " << pair.second[i] @@ -1340,7 +1339,7 @@ class DistributedFusedLambOpKernel auto stream = dev_ctx.stream(); auto place = dev_ctx.GetPlace(); - auto *found_inf_t = ctx.Output("FoundInf"); + auto *found_inf_t = ctx.Output("FoundInf"); found_inf_t->Resize({1}); // Step 1: Get fp16 param and grad tensors @@ -1397,7 +1396,7 @@ class DistributedFusedLambOpKernel platform::errors::InvalidArgument( "The gradient accumulation steps should be not less than 1.")); if (acc_steps > 1) { - auto *step_t = ctx.Output("AccStep"); + auto *step_t = ctx.Output("AccStep"); PADDLE_ENFORCE_NOT_NULL( step_t, platform::errors::InvalidArgument( @@ -1417,7 +1416,7 @@ class DistributedFusedLambOpKernel float *fp32_acc_grad = nullptr; if (has_fp32_param) { auto *fp32_acc_grad_t = - ctx.Output("FP32AccFusedGrad"); + ctx.Output("FP32AccFusedGrad"); PADDLE_ENFORCE_NOT_NULL( fp32_acc_grad_t, platform::errors::InvalidArgument( @@ -1437,7 +1436,7 @@ class DistributedFusedLambOpKernel if (has_fp16_param) { use_master_acc_grad = ctx.Attr("use_master_acc_grad"); auto *fp16_acc_grad_t = - ctx.Output("FP16AccFusedGrad"); + ctx.Output("FP16AccFusedGrad"); PADDLE_ENFORCE_NOT_NULL( fp16_acc_grad_t, platform::errors::InvalidArgument( @@ -1527,7 +1526,7 @@ class DistributedFusedLambOpKernel } } - auto *stop_update_t = ctx.Output("StopUpdate"); + auto *stop_update_t = ctx.Output("StopUpdate"); stop_update_t->Resize({1}); auto *stop_update = stop_update_t->mutable_data(platform::CPUPlace()); @@ -2061,18 +2060,18 @@ class DistributedFusedLambOpKernel VLOG(10) << "ReduceScatter done"; // Step 7: update the moment1, moment2. Calcuate the trust_ratio_div - auto *fused_offsets_t = ctx.Input("FusedParamOffsets"); + auto *fused_offsets_t = ctx.Input("FusedParamOffsets"); auto *fused_offsets = fused_offsets_t->data(); auto *fp32_partial_fused_offsets_t = - ctx.Input("FP32ShardFusedParamOffsets"); + ctx.Input("FP32ShardFusedParamOffsets"); const auto *fp32_partial_fused_offsets = fp32_partial_fused_offsets_t->data(); auto *fp16_partial_fused_offsets_t = - ctx.Input("FP16ShardFusedParamOffsets"); + ctx.Input("FP16ShardFusedParamOffsets"); const auto *fp16_partial_fused_offsets = fp16_partial_fused_offsets_t->data(); - auto *step = ctx.Output("Step")->data(); + auto *step = ctx.Output("Step")->data(); VLOG(1) << "FusedParamOffsets: " << FlattenToString(fused_offsets, diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc index d058b890cbd9d..9d522031acf6c 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.cc +++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class DpsgdOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.h b/paddle/fluid/operators/optimizers/dpsgd_op.h index 7d6a99ad2c55c..abc5a619ca830 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.h +++ b/paddle/fluid/operators/optimizers/dpsgd_op.h @@ -46,12 +46,12 @@ class DpsgdOpKernel : public framework::OpKernel { ctx.InputNames("Grad").front(), framework::ToTypeName(grad_var->Type()))); - const auto *learning_rate = ctx.Input("LearningRate"); + const auto *learning_rate = ctx.Input("LearningRate"); - const auto *param = ctx.Input("Param"); - const auto *grad = ctx.Input("Grad"); + const auto *param = ctx.Input("Param"); + const auto *grad = ctx.Input("Grad"); - auto *param_out = ctx.Output("ParamOut"); + auto *param_out = ctx.Output("ParamOut"); auto sz = param_out->numel(); PADDLE_ENFORCE_EQ(param->numel(), diff --git a/paddle/fluid/operators/optimizers/ftrl_op.cc b/paddle/fluid/operators/optimizers/ftrl_op.cc index 50060b1636943..b81a6c5ab6bb7 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.cc +++ b/paddle/fluid/operators/optimizers/ftrl_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class FTRLOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index e15233c718a9a..abd0e15e471b1 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -120,15 +120,15 @@ class FTRLOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { const auto* grad_var = ctx.InputVar("Grad"); - auto* lr_in = ctx.Input("LearningRate"); + auto* lr_in = ctx.Input("LearningRate"); - auto* param_in = ctx.Input("Param"); - auto* sq_accum_in = ctx.Input("SquaredAccumulator"); - auto* lin_accum_in = ctx.Input("LinearAccumulator"); + auto* param_in = ctx.Input("Param"); + auto* sq_accum_in = ctx.Input("SquaredAccumulator"); + auto* lin_accum_in = ctx.Input("LinearAccumulator"); - auto* param_out = ctx.Output("ParamOut"); - auto* sq_accum_out = ctx.Output("SquaredAccumOut"); - auto* lin_accum_out = ctx.Output("LinearAccumOut"); + auto* param_out = ctx.Output("ParamOut"); + auto* sq_accum_out = ctx.Output("SquaredAccumOut"); + auto* lin_accum_out = ctx.Output("LinearAccumOut"); param_out->mutable_data(ctx.GetPlace()); sq_accum_out->mutable_data(ctx.GetPlace()); @@ -139,7 +139,7 @@ class FTRLOpKernel : public framework::OpKernel { auto lr_power = static_cast(ctx.Attr("lr_power")); if (grad_var->IsType()) { - auto grad = ctx.Input("Grad"); + auto grad = ctx.Input("Grad"); auto g = EigenVector::Flatten(*grad); auto p = EigenVector::Flatten(*param_in); diff --git a/paddle/fluid/operators/optimizers/lamb_op.cc b/paddle/fluid/operators/optimizers/lamb_op.cc index e9d6ab77f4357..3e2ee495b0586 100644 --- a/paddle/fluid/operators/optimizers/lamb_op.cc +++ b/paddle/fluid/operators/optimizers/lamb_op.cc @@ -37,7 +37,7 @@ class LambOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (var_name == "Beta1Pow" || var_name == "Beta2Pow") { return expected_kernel_type; diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.cu b/paddle/fluid/operators/optimizers/lars_momentum_op.cu index 5337e56b28d5b..066bf66c4549a 100644 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.cu +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cu @@ -484,7 +484,7 @@ class LarsMomentumOpCUDAKernel : public framework::OpKernel { bool multi_precision = ctx.Attr("multi_precision"); auto& cuda_ctx = ctx.template device_context(); int sm_num = cuda_ctx.GetSMCount(); - framework::Tensor tmp_buffer_t = ctx.AllocateTmpTensor( + phi::DenseTensor tmp_buffer_t = ctx.AllocateTmpTensor( {LARS_BLOCK_SIZE << 1}, cuda_ctx); auto* p_buffer = tmp_buffer_t.mutable_data(ctx.GetPlace()); auto* g_buffer = p_buffer + LARS_BLOCK_SIZE; diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.h b/paddle/fluid/operators/optimizers/lars_momentum_op.h index 459900b14f61d..4aaf37af73faf 100644 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.h +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.h @@ -49,7 +49,7 @@ class LarsMomentumOpKernel : public framework::OpKernel { auto g = framework::EigenVector::Flatten(*(grad[i])); auto rescale_g = rescale_grad * g; - framework::Tensor p_norm_t, g_norm_t; + phi::DenseTensor p_norm_t, g_norm_t; p_norm_t.Resize({1}); g_norm_t.Resize({1}); p_norm_t.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/optimizers/merged_adam_op.cc b/paddle/fluid/operators/optimizers/merged_adam_op.cc index f49fc72d01030..8e4ff40372a12 100644 --- a/paddle/fluid/operators/optimizers/merged_adam_op.cc +++ b/paddle/fluid/operators/optimizers/merged_adam_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class MergedAdamOp : public framework::OperatorWithKernel { public: @@ -34,7 +34,7 @@ class MergedAdamOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "Beta1Pow" || var_name == "Beta2Pow" || var_name == "SkipUpdate") { diff --git a/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc b/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc index 90faf8f389a89..c390a12863bc4 100644 --- a/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc @@ -28,8 +28,8 @@ template class MLUMergedMomentumOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto params = ctx.MultiInput("Param"); - auto params_out = ctx.MultiOutput("ParamOut"); + auto params = ctx.MultiInput("Param"); + auto params_out = ctx.MultiOutput("ParamOut"); size_t n = params.size(); PADDLE_ENFORCE_EQ(n, params_out.size(), @@ -47,7 +47,7 @@ class MLUMergedMomentumOpKernel : public framework::OpKernel { "must be the same Tensors.")); } - auto grads = ctx.MultiInput("Grad"); + auto grads = ctx.MultiInput("Grad"); PADDLE_ENFORCE_EQ( n, grads.size(), @@ -57,7 +57,7 @@ class MLUMergedMomentumOpKernel : public framework::OpKernel { grads.size(), n)); - auto velocitys = ctx.MultiInput("Velocity"); + auto velocitys = ctx.MultiInput("Velocity"); PADDLE_ENFORCE_EQ(n, velocitys.size(), platform::errors::InvalidArgument( @@ -67,7 +67,7 @@ class MLUMergedMomentumOpKernel : public framework::OpKernel { velocitys.size(), n)); - auto velocitys_out = ctx.MultiOutput("VelocityOut"); + auto velocitys_out = ctx.MultiOutput("VelocityOut"); PADDLE_ENFORCE_EQ( n, velocitys_out.size(), @@ -86,7 +86,7 @@ class MLUMergedMomentumOpKernel : public framework::OpKernel { } auto mu = static_cast(ctx.Attr("mu")); - auto lrs = ctx.MultiInput("LearningRate"); + auto lrs = ctx.MultiInput("LearningRate"); if (lrs.size() != 1) { PADDLE_ENFORCE_EQ( n, diff --git a/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc b/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc index 38479d6dba22e..6dd1cdbc03e9a 100644 --- a/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc +++ b/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc @@ -28,8 +28,8 @@ template class NPUMergedMomentumOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto params = ctx.MultiInput("Param"); - auto params_out = ctx.MultiOutput("ParamOut"); + auto params = ctx.MultiInput("Param"); + auto params_out = ctx.MultiOutput("ParamOut"); size_t n = params.size(); PADDLE_ENFORCE_EQ(n, params_out.size(), @@ -47,7 +47,7 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel { "must be the same Tensors.")); } - auto grads = ctx.MultiInput("Grad"); + auto grads = ctx.MultiInput("Grad"); PADDLE_ENFORCE_EQ( n, grads.size(), @@ -57,7 +57,7 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel { grads.size(), n)); - auto velocitys = ctx.MultiInput("Velocity"); + auto velocitys = ctx.MultiInput("Velocity"); PADDLE_ENFORCE_EQ(n, velocitys.size(), platform::errors::InvalidArgument( @@ -67,7 +67,7 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel { velocitys.size(), n)); - auto velocitys_out = ctx.MultiOutput("VelocityOut"); + auto velocitys_out = ctx.MultiOutput("VelocityOut"); PADDLE_ENFORCE_EQ( n, velocitys_out.size(), @@ -86,7 +86,7 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel { } T mu = static_cast(ctx.Attr("mu")); - auto lrs = ctx.MultiInput("LearningRate"); + auto lrs = ctx.MultiInput("LearningRate"); if (lrs.size() != 1) { PADDLE_ENFORCE_EQ( n, diff --git a/paddle/fluid/operators/optimizers/mkldnn/sgd_mkldnn_op.cc b/paddle/fluid/operators/optimizers/mkldnn/sgd_mkldnn_op.cc index e332972f7576a..ea5f3f9a2e806 100644 --- a/paddle/fluid/operators/optimizers/mkldnn/sgd_mkldnn_op.cc +++ b/paddle/fluid/operators/optimizers/mkldnn/sgd_mkldnn_op.cc @@ -28,10 +28,10 @@ class SGDOneDNNKernel : public SGDOpKernel { void dense_param_and_grad_kernel( const framework::ExecutionContext &ctx) const override { VLOG(4) << "[ONEDNN]: sgd_dense_param_kernel"; - const auto *learning_rate = ctx.Input("LearningRate"); - const auto *param = ctx.Input("Param"); - auto *param_out = ctx.Output("ParamOut"); - const auto *grad = ctx.Input("Grad"); + const auto *learning_rate = ctx.Input("LearningRate"); + const auto *param = ctx.Input("Param"); + auto *param_out = ctx.Output("ParamOut"); + const auto *grad = ctx.Input("Grad"); auto *out_data = param_out->mutable_data(ctx.GetPlace()); const T *param_data = param->data(); @@ -46,8 +46,8 @@ class SGDOneDNNKernel : public SGDOpKernel { void dense_param_sparse_grad_kernel( const framework::ExecutionContext &ctx) const override { VLOG(4) << "[ONEDNN]: sgd_dense_param_kernel"; - const auto *learning_rate = ctx.Input("LearningRate"); - auto *param_out = ctx.Output("ParamOut"); + const auto *learning_rate = ctx.Input("LearningRate"); + auto *param_out = ctx.Output("ParamOut"); const auto *grad = ctx.Input("Grad"); const auto &grad_value = grad->value(); diff --git a/paddle/fluid/operators/optimizers/momentum_op.cc b/paddle/fluid/operators/optimizers/momentum_op.cc index 7a738a8994768..4171f0c11955a 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.cc +++ b/paddle/fluid/operators/optimizers/momentum_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class MomentumOpInferVarType : public framework::VarTypeInference { public: diff --git a/paddle/fluid/operators/optimizers/momentum_op_mlu.cc b/paddle/fluid/operators/optimizers/momentum_op_mlu.cc index eeeddfc793f31..9aa16c7fe642b 100644 --- a/paddle/fluid/operators/optimizers/momentum_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/momentum_op_mlu.cc @@ -37,19 +37,19 @@ class MLUMomentumOpKernel : public framework::OpKernel { T mu = static_cast(ctx.Attr("mu")); bool use_nesterov = ctx.Attr("use_nesterov"); - auto learning_rate = ctx.Input("LearningRate"); - auto param = ctx.Input("Param"); - auto velocity = ctx.Input("Velocity"); + auto learning_rate = ctx.Input("LearningRate"); + auto param = ctx.Input("Param"); + auto velocity = ctx.Input("Velocity"); - auto param_out = ctx.Output("ParamOut"); - auto velocity_out = ctx.Output("VelocityOut"); + auto param_out = ctx.Output("ParamOut"); + auto velocity_out = ctx.Output("VelocityOut"); param_out->mutable_data(ctx.GetPlace()); velocity_out->mutable_data(ctx.GetPlace()); auto* grad_var = ctx.InputVar("Grad"); if (grad_var->IsType()) { - auto grad = ctx.Input("Grad"); + auto grad = ctx.Input("Grad"); Tensor mu_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc mu_tensor_desc(mu_tensor); diff --git a/paddle/fluid/operators/optimizers/momentum_op_npu.cc b/paddle/fluid/operators/optimizers/momentum_op_npu.cc index 234f86fe38bca..40136919fe17a 100644 --- a/paddle/fluid/operators/optimizers/momentum_op_npu.cc +++ b/paddle/fluid/operators/optimizers/momentum_op_npu.cc @@ -37,19 +37,19 @@ class NPUMomentumOpKernel : public framework::OpKernel { T mu = static_cast(ctx.Attr("mu")); bool use_nesterov = ctx.Attr("use_nesterov"); - auto learning_rate = ctx.Input("LearningRate"); - auto param = ctx.Input("Param"); - auto velocity = ctx.Input("Velocity"); + auto learning_rate = ctx.Input("LearningRate"); + auto param = ctx.Input("Param"); + auto velocity = ctx.Input("Velocity"); - auto param_out = ctx.Output("ParamOut"); - auto velocity_out = ctx.Output("VelocityOut"); + auto param_out = ctx.Output("ParamOut"); + auto velocity_out = ctx.Output("VelocityOut"); param_out->mutable_data(ctx.GetPlace()); velocity_out->mutable_data(ctx.GetPlace()); auto* grad_var = ctx.InputVar("Grad"); if (grad_var->IsType()) { - auto grad = ctx.Input("Grad"); + auto grad = ctx.Input("Grad"); Tensor mu_tensor; mu_tensor.mutable_data(phi::make_ddim({1}), ctx.GetPlace()); FillNpuTensorWithConstant(&mu_tensor, mu); diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h index d3d2e48fdcd6c..8f3be79cd4c8d 100644 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h +++ b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h @@ -71,10 +71,10 @@ template class Pow2DecayWithLinearWarmupOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const { - const auto *lr = ctx.Input("LearningRate"); - const auto *step = ctx.Input("Step"); - auto *lr_out = ctx.Output("LearningRateOut"); - auto *step_out = ctx.Output("StepOut"); + const auto *lr = ctx.Input("LearningRate"); + const auto *step = ctx.Input("Step"); + auto *lr_out = ctx.Output("LearningRateOut"); + auto *step_out = ctx.Output("StepOut"); PADDLE_ENFORCE_EQ( lr, lr_out, diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op_xpu.cc b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op_xpu.cc index 4a13e226df8ce..543a4634c6d71 100644 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op_xpu.cc @@ -27,10 +27,10 @@ template class Pow2DecayWithLinearWarmupXPUOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const { - const auto *lr = ctx.Input("LearningRate"); - const auto *step = ctx.Input("Step"); - auto *lr_out = ctx.Output("LearningRateOut"); - auto *step_out = ctx.Output("StepOut"); + const auto *lr = ctx.Input("LearningRate"); + const auto *step = ctx.Input("Step"); + auto *lr_out = ctx.Output("LearningRateOut"); + auto *step_out = ctx.Output("StepOut"); PADDLE_ENFORCE_EQ( lr, lr_out, diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc index de280a6788779..2da5bed7642c1 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class ProximalAdagradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.h b/paddle/fluid/operators/optimizers/proximal_adagrad_op.h index 3faf8ea765944..136e416307ab0 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.h @@ -19,14 +19,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ProximalAdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* param_out = ctx.Output("ParamOut"); - auto* moment_out = ctx.Output("MomentOut"); + auto* param_out = ctx.Output("ParamOut"); + auto* moment_out = ctx.Output("MomentOut"); param_out->mutable_data(ctx.GetPlace()); moment_out->mutable_data(ctx.GetPlace()); @@ -34,12 +34,14 @@ class ProximalAdagradOpKernel : public framework::OpKernel { auto l1 = static_cast(ctx.Attr("l1")); auto l2 = static_cast(ctx.Attr("l2")); - auto grad = ctx.Input("Grad"); - auto p = framework::EigenVector::Flatten(*ctx.Input("Param")); - auto m = framework::EigenVector::Flatten(*ctx.Input("Moment")); + auto grad = ctx.Input("Grad"); + auto p = framework::EigenVector::Flatten( + *ctx.Input("Param")); + auto m = framework::EigenVector::Flatten( + *ctx.Input("Moment")); auto g = framework::EigenVector::Flatten(*grad); - auto lr = - framework::EigenVector::Flatten(*ctx.Input("LearningRate")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); auto p_out = framework::EigenVector::Flatten(*param_out); auto m_out = framework::EigenVector::Flatten(*moment_out); diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cc b/paddle/fluid/operators/optimizers/proximal_gd_op.cc index 2460b30fa26b0..061e495c4bacd 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class ProximalGDOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.h b/paddle/fluid/operators/optimizers/proximal_gd_op.h index 7caa8421f041c..024062045ae43 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.h +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.h @@ -19,25 +19,26 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ProximalGDOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* param_out = ctx.Output("ParamOut"); + auto* param_out = ctx.Output("ParamOut"); param_out->mutable_data(ctx.GetPlace()); - auto grad = ctx.Input("Grad"); + auto grad = ctx.Input("Grad"); auto l1 = static_cast(ctx.Attr("l1")); auto l2 = static_cast(ctx.Attr("l2")); - auto p = framework::EigenVector::Flatten(*ctx.Input("Param")); + auto p = framework::EigenVector::Flatten( + *ctx.Input("Param")); auto g = framework::EigenVector::Flatten(*grad); - auto lr = - framework::EigenVector::Flatten(*ctx.Input("LearningRate")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); auto p_out = framework::EigenVector::Flatten(*param_out); auto& place = *ctx.template device_context().eigen_device(); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc index 8f6a35a8b6747..a70c129bad038 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op_npu.cc @@ -15,7 +15,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -62,8 +62,8 @@ class RMSPROPNPUKernel : public framework::OpKernel { epsilon_tmp.mutable_data({1}, ctx.GetPlace()); FillNpuTensorWithConstant(&epsilon_tmp, epsilon); epsilon_tensor = &epsilon_tmp; - auto *mg_tensor = ctx.Input("MeanGrad"); - auto *mean_grad_out = ctx.Output("MeanGradOut"); + auto *mg_tensor = ctx.Input("MeanGrad"); + auto *mean_grad_out = ctx.Output("MeanGradOut"); mean_grad_out->mutable_data(ctx.GetPlace()); const auto &runner_applycenterrmsprop = NpuOpRunner( std::string("ApplyCenteredRMSPropD"), diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc index 803bc9f980a51..0bc3cb11f4d06 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cc +++ b/paddle/fluid/operators/optimizers/sgd_op.cc @@ -60,7 +60,7 @@ class SGDOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const { if (var_name == "LearningRate") { return framework::OpKernelType( diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index 28ca7c6d8d3b7..686529758260a 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -76,18 +76,18 @@ class SGDOpKernel : public framework::OpKernel { "but the received is %s", ctx.InputNames("Param").front(), paddle::framework::ToTypeName(param_var->Type()))); - using paddle::framework::Tensor; + using MPDType = typename details::MPTypeTrait::Type; - auto* param = ctx.Input("Param"); - auto* param_out = ctx.Output("ParamOut"); - auto* learning_rate = ctx.Input("LearningRate"); + auto* param = ctx.Input("Param"); + auto* param_out = ctx.Output("ParamOut"); + auto* learning_rate = ctx.Input("LearningRate"); auto* grad_var = ctx.InputVar("Grad"); const bool multi_precision = ctx.Attr("multi_precision"); - const Tensor* master_param = nullptr; - Tensor* master_param_out = nullptr; + const phi::DenseTensor* master_param = nullptr; + phi::DenseTensor* master_param_out = nullptr; if (multi_precision) { bool has_master = ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut"); @@ -97,8 +97,8 @@ class SGDOpKernel : public framework::OpKernel { "The Input(MasterParam) and Output(MasterParamOut) " "should not be null when " "the attr `multi_precision` is true")); - master_param = ctx.Input("MasterParam"); - master_param_out = ctx.Output("MasterParamOut"); + master_param = ctx.Input("MasterParam"); + master_param_out = ctx.Output("MasterParamOut"); } const MPDType* master_in_data = multi_precision ? master_param->data() : nullptr; @@ -109,7 +109,7 @@ class SGDOpKernel : public framework::OpKernel { // Actually, all tensors are LoDTensor except SelectedRows. if (grad_var->IsType()) { - auto* grad = ctx.Input("Grad"); + auto* grad = ctx.Input("Grad"); int block = 512; int grid = (param->numel() + block - 1) / block; diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h index 02d8bcbd279dc..16f3e76662dae 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.h +++ b/paddle/fluid/operators/optimizers/sgd_op.h @@ -38,10 +38,10 @@ struct sgd_dense_param_kernel< framework::VarTypeTrait::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; - const auto *learning_rate = ctx.Input("LearningRate"); - const auto *param = ctx.Input("Param"); - auto *param_out = ctx.Output("ParamOut"); - const auto *grad = ctx.Input("Grad"); + const auto *learning_rate = ctx.Input("LearningRate"); + const auto *param = ctx.Input("Param"); + auto *param_out = ctx.Output("ParamOut"); + const auto *grad = ctx.Input("Grad"); const auto sz = param_out->numel(); jit::sgd_attr_t attr(1, sz, 1, sz, 1); @@ -64,9 +64,9 @@ struct sgd_dense_param_kernel::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; - const auto *learning_rate = ctx.Input("LearningRate"); - const auto *param = ctx.Input("Param"); - auto *param_out = ctx.Output("ParamOut"); + const auto *learning_rate = ctx.Input("LearningRate"); + const auto *param = ctx.Input("Param"); + auto *param_out = ctx.Output("ParamOut"); const auto *grad = ctx.Input("Grad"); const auto &grad_value = grad->value(); @@ -98,10 +98,10 @@ struct sgd_dense_param_kernel< framework::VarTypeTrait::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; - const auto *learning_rate = ctx.Input("LearningRate"); - const auto *param = ctx.Input("Param"); - auto *param_out = ctx.Output("ParamOut"); - const auto *grad = ctx.Input("Grad"); + const auto *learning_rate = ctx.Input("LearningRate"); + const auto *param = ctx.Input("Param"); + auto *param_out = ctx.Output("ParamOut"); + const auto *grad = ctx.Input("Grad"); param_out->mutable_data(ctx.GetPlace()); auto p = framework::EigenVector::Flatten(*param); @@ -119,8 +119,8 @@ struct sgd_dense_param_kernel::kId> { void operator()(const framework::ExecutionContext &ctx) const { VLOG(4) << "[CPU]: sgd_dense_param_kernel"; - const auto *learning_rate = ctx.Input("LearningRate"); - auto *param_out = ctx.Output("ParamOut"); + const auto *learning_rate = ctx.Input("LearningRate"); + auto *param_out = ctx.Output("ParamOut"); const auto *grad = ctx.Input("Grad"); const auto &grad_value = grad->value(); @@ -181,12 +181,12 @@ class SGDOpKernel : public framework::OpKernel { protected: void invoke_dense_param_kernel(const framework::ExecutionContext &ctx) const { - const auto *param = ctx.Input("Param"); - auto *param_out = ctx.Output("ParamOut"); + const auto *param = ctx.Input("Param"); + auto *param_out = ctx.Output("ParamOut"); const auto *grad_var = ctx.InputVar("Grad"); if (grad_var->IsType()) { - const auto *grad = ctx.Input("Grad"); + const auto *grad = ctx.Input("Grad"); const auto sz = param_out->numel(); PADDLE_ENFORCE_EQ(param->numel(), sz, @@ -269,7 +269,7 @@ class SGDOpKernel : public framework::OpKernel { void sparse_param_and_grad_kernel( const framework::ExecutionContext &ctx) const { - const auto *learning_rate = ctx.Input("LearningRate"); + const auto *learning_rate = ctx.Input("LearningRate"); const auto *param_var = ctx.InputVar("Param"); const auto *grad_var = ctx.InputVar("Grad"); diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc index a92bbbc838a8a..3e072a5e17a64 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc @@ -19,7 +19,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SparseMomentumOpInferVarType : public framework::VarTypeInference { public: diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h index e3d59a3744c0b..9eea5c11cb074 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h @@ -36,8 +36,6 @@ namespace cub = hipcub; namespace paddle { namespace operators { -using framework::Tensor; - template using MultiPrecisionType = typename details::MPTypeTrait::Type; @@ -305,7 +303,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { const bool multi_precision = ctx.Attr("multi_precision"); bool use_nesterov = ctx.Attr("use_nesterov"); - auto index = ctx.Input("Index"); + auto index = ctx.Input("Index"); const auto& index_type = framework::TransToProtoVarType(index->dtype()); if (multi_precision) { if (use_nesterov) { @@ -371,8 +369,8 @@ class SparseMomentumOpKernel : public framework::OpKernel { int axis = ctx.Attr("axis"); // get axis from tensor if (ctx.HasInput("Axis")) { - Tensor cpu_axis; - const Tensor* axis_tensor = ctx.Input("Axis"); + phi::DenseTensor cpu_axis; + const phi::DenseTensor* axis_tensor = ctx.Input("Axis"); framework::TensorCopy(*axis_tensor, platform::CPUPlace(), &cpu_axis); const auto& axis_type = framework::TransToProtoVarType(axis_tensor->dtype()); @@ -388,12 +386,12 @@ class SparseMomentumOpKernel : public framework::OpKernel { platform::errors::InvalidArgument("The axis of sparse_momentum_op only " "support axis=0 or axis=1 now.")); - auto learning_rate = ctx.Input("LearningRate"); - auto param = ctx.Input("Param"); - auto param_out = ctx.Output("ParamOut"); - auto velocity = ctx.Input("Velocity"); - auto velocity_out = ctx.Output("VelocityOut"); - auto index = ctx.Input("Index"); + auto learning_rate = ctx.Input("LearningRate"); + auto param = ctx.Input("Param"); + auto param_out = ctx.Output("ParamOut"); + auto velocity = ctx.Input("Velocity"); + auto velocity_out = ctx.Output("VelocityOut"); + auto index = ctx.Input("Index"); int64_t num_index = index->numel(); // check index of shape 1-D @@ -412,8 +410,8 @@ class SparseMomentumOpKernel : public framework::OpKernel { " the second dimension should be 1.")); } - const framework::Tensor* master_param = nullptr; - framework::Tensor* master_param_out = nullptr; + const phi::DenseTensor* master_param = nullptr; + phi::DenseTensor* master_param_out = nullptr; if (multi_precision) { bool has_master = ctx.HasInput("MasterParam") && ctx.HasOutput("MasterParamOut"); @@ -423,8 +421,8 @@ class SparseMomentumOpKernel : public framework::OpKernel { "The Input(MasterParam) and Output(MasterParamOut) " "should not be null when " "the attr `multi_precision` is true")); - master_param = ctx.Input("MasterParam"); - master_param_out = ctx.Output("MasterParamOut"); + master_param = ctx.Input("MasterParam"); + master_param_out = ctx.Output("MasterParamOut"); } param_out->mutable_data(ctx.GetPlace()); @@ -435,7 +433,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { multi_precision ? master_param_out->mutable_data(ctx.GetPlace()) : nullptr; - auto grad = ctx.Input("Grad"); + auto grad = ctx.Input("Grad"); platform::ForRange for_range( static_cast(ctx.device_context()), @@ -455,7 +453,7 @@ class SparseMomentumOpKernel : public framework::OpKernel { "The Grad's rank of sparse_momentum_op" " must be 2 now.")); - Tensor sorted_index, grad_index, sort_value; + phi::DenseTensor sorted_index, grad_index, sort_value; auto sorted_index_ptr = sorted_index.mutable_data({num_index}, ctx.GetPlace()); auto grad_index_ptr = diff --git a/paddle/fluid/operators/p_norm_op_npu.cc b/paddle/fluid/operators/p_norm_op_npu.cc index fb7ae8756d446..9d312dd572a45 100644 --- a/paddle/fluid/operators/p_norm_op_npu.cc +++ b/paddle/fluid/operators/p_norm_op_npu.cc @@ -22,8 +22,8 @@ template class PnormNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in_x = ctx.Input("X"); - auto* out_norm = ctx.Output("Out"); + auto* in_x = ctx.Input("X"); + auto* out_norm = ctx.Output("Out"); out_norm->mutable_data(ctx.GetPlace()); float porder = ctx.Attr("porder"); @@ -93,11 +93,11 @@ template class PnormGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - using Tensor = framework::Tensor; - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Out"); - auto* dy = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + using Tensor = phi::DenseTensor; + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Out"); + auto* dy = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); dx->mutable_data(place); diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 66aef5fe4eaa2..b812338e2cb79 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -23,8 +23,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - template void Pad2DConstNCHW(const T* in_data, const int num, @@ -391,7 +389,7 @@ void Pad2DGradEdgeNHWC(T* d_in_data, static inline void GetPaddings(int* paddings, const framework::ExecutionContext& context) { - auto* paddings_t = context.Input("Paddings"); + auto* paddings_t = context.Input("Paddings"); if (paddings_t) { auto paddings_data = paddings_t->data(); paddings[0] = paddings_data[0]; @@ -414,11 +412,11 @@ class Pad2dCPUKernel : public framework::OpKernel { auto data_format = context.Attr("data_format"); T value = static_cast(context.Attr("pad_value")); - auto* x = context.Input("X"); + auto* x = context.Input("X"); auto in_dims = x->dims(); const T* in_data = x->data(); - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); if (data_format == "NCHW") { out->Resize({in_dims[0], in_dims[1], @@ -530,8 +528,9 @@ class Pad2dGradCPUKernel : public framework::OpKernel { GetPaddings(pads, context); auto mode = context.Attr("mode"); auto data_format = context.Attr("data_format"); - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); + auto* d_out = + context.Input(framework::GradVarName("Out")); + auto* d_in = context.Output(framework::GradVarName("X")); auto d_in_dims = d_in->dims(); auto d_out_dims = d_out->dims(); const T* d_out_data = d_out->data(); @@ -704,7 +703,7 @@ class Pad2dOp : public framework::OperatorWithKernel { // only constant mode and non-blocked layouts are supported for oneDNN if (this->CanMKLDNNBeUsed(ctx, input_data_type) && ctx.Attr("mode") == "constant" && - ctx.Input("X") + ctx.Input("X") ->mem_desc() .data.format_desc.blocking.inner_nblks == 0) { return framework::OpKernelType(input_data_type, @@ -718,7 +717,7 @@ class Pad2dOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu index 5ed217b2e60ef..c76a6b61e780e 100644 --- a/paddle/fluid/operators/pad2d_op.cu +++ b/paddle/fluid/operators/pad2d_op.cu @@ -24,8 +24,6 @@ namespace operators { using platform::PADDLE_CUDA_NUM_THREADS; -using framework::Tensor; - template __global__ void Pad2DConstNCHW(const int nthreads, const T* in_data, @@ -350,9 +348,9 @@ __global__ void Pad2DGradEdgeNHWC(const int out_size, static inline void GetPaddings(int* paddings, const framework::ExecutionContext& context) { - auto* paddings_t = context.Input("Paddings"); + auto* paddings_t = context.Input("Paddings"); if (paddings_t) { - Tensor pads; + phi::DenseTensor pads; framework::TensorCopySync(*paddings_t, platform::CPUPlace(), &pads); auto pads_data = pads.data(); paddings[0] = pads_data[0]; @@ -375,10 +373,10 @@ class Pad2dCUDAKernel : public framework::OpKernel { auto data_format = context.Attr("data_format"); T value = static_cast(context.Attr("pad_value")); - auto* x = context.Input("X"); + auto* x = context.Input("X"); auto in_dims = x->dims(); const T* in_data = x->data(); - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); auto out_dims = out->dims(); if (data_format == "NCHW") { out_dims[0] = in_dims[0]; @@ -501,8 +499,9 @@ class Pad2dGradCUDAKernel : public framework::OpKernel { GetPaddings(pads, context); auto mode = context.Attr("mode"); auto data_format = context.Attr("data_format"); - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); + auto* d_out = + context.Input(framework::GradVarName("Out")); + auto* d_in = context.Output(framework::GradVarName("X")); auto d_in_dims = d_in->dims(); auto d_out_dims = d_out->dims(); const T* d_out_data = d_out->data(); diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index 65475f63ec0f9..8fb86ac37aa22 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -25,8 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class Pad3dOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -39,7 +37,7 @@ class Pad3dOp : public framework::OperatorWithKernel { // only constant mode and non-blocked layouts are supported for oneDNN if (this->CanMKLDNNBeUsed(ctx, input_data_type) && ctx.Attr("mode") == "constant" && - ctx.Input("X") + ctx.Input("X") ->mem_desc() .data.format_desc.blocking.inner_nblks == 0) { return framework::OpKernelType(input_data_type, @@ -53,7 +51,7 @@ class Pad3dOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && diff --git a/paddle/fluid/operators/pad3d_op_npu.cc b/paddle/fluid/operators/pad3d_op_npu.cc index 5f1ec06018277..7694e0edbf9f9 100644 --- a/paddle/fluid/operators/pad3d_op_npu.cc +++ b/paddle/fluid/operators/pad3d_op_npu.cc @@ -19,12 +19,12 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static inline std::vector GetPaddings( const framework::ExecutionContext& context) { std::vector paddings(6); - auto* paddings_t = context.Input("Paddings"); + auto* paddings_t = context.Input("Paddings"); if (paddings_t) { paddle::framework::TensorToVector( *paddings_t, context.device_context(), &paddings); @@ -39,7 +39,7 @@ template class Pad3dNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); + auto* x = context.Input("X"); auto in_dims = x->dims(); std::vector pads = GetPaddings(context); @@ -47,7 +47,7 @@ class Pad3dNPUKernel : public framework::OpKernel { float value = context.Attr("value"); auto data_format = context.Attr("data_format"); - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); PADDLE_ENFORCE_LT(abs(value), 1e-5, @@ -106,8 +106,9 @@ class Pad3dGradNPUKernel : public framework::OpKernel { auto mode = context.Attr("mode"); auto data_format = context.Attr("data_format"); - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); + auto* d_out = + context.Input(framework::GradVarName("Out")); + auto* d_in = context.Output(framework::GradVarName("X")); auto d_in_dims = d_in->dims(); d_in->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/pad_constant_like_op.cc b/paddle/fluid/operators/pad_constant_like_op.cc index 254e8ebe5c570..28d264ba8e41f 100644 --- a/paddle/fluid/operators/pad_constant_like_op.cc +++ b/paddle/fluid/operators/pad_constant_like_op.cc @@ -19,8 +19,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class PadConstantLikeOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/pad_constant_like_op.h b/paddle/fluid/operators/pad_constant_like_op.h index 1207eb1d5cde2..ba87bd3ef1818 100644 --- a/paddle/fluid/operators/pad_constant_like_op.h +++ b/paddle/fluid/operators/pad_constant_like_op.h @@ -30,9 +30,9 @@ template class PadConstantLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto in_x = context.Input("X"); - auto in_y = context.Input("Y"); - auto* out = context.Output("Out"); + auto in_x = context.Input("X"); + auto in_y = context.Input("Y"); + auto* out = context.Output("Out"); if (in_x->dims() == in_y->dims()) { framework::TensorCopy(*in_y, context.GetPlace(), out); @@ -42,7 +42,7 @@ class PadConstantLikeKernel : public framework::OpKernel { T pad_value = static_cast(context.Attr("pad_value")); out->mutable_data(context.GetPlace()); - int rank = context.Input("X")->dims().size(); + int rank = context.Input("X")->dims().size(); std::vector pads(rank * 2, 0); @@ -65,10 +65,10 @@ template class PadConstantLikeGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto in_y = context.Input("Y"); + auto in_y = context.Input("Y"); auto in_dout = - context.Input(framework::GradVarName("Out")); - auto* d_y = context.Output(framework::GradVarName("Y")); + context.Input(framework::GradVarName("Out")); + auto* d_y = context.Output(framework::GradVarName("Y")); if (d_y == nullptr) { return; diff --git a/paddle/fluid/operators/pad_op.cc b/paddle/fluid/operators/pad_op.cc index fb4a90ebd8ca9..4e6a10a912a88 100644 --- a/paddle/fluid/operators/pad_op.cc +++ b/paddle/fluid/operators/pad_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class PadOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/pad_op_npu.cc b/paddle/fluid/operators/pad_op_npu.cc index 061da7d76e5df..425defc9792c7 100644 --- a/paddle/fluid/operators/pad_op_npu.cc +++ b/paddle/fluid/operators/pad_op_npu.cc @@ -19,14 +19,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class PadNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); auto paddings = context.Attr>("paddings"); float pad_value = context.Attr("pad_value"); @@ -56,8 +56,9 @@ template class PadGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* d_x = context.Output(framework::GradVarName("X")); + auto* d_out = + context.Input(framework::GradVarName("Out")); + auto* d_x = context.Output(framework::GradVarName("X")); auto paddings = context.Attr>("paddings"); d_x->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index e9b54632ddc01..396c4f2d038b7 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class PartialConcatOp : public framework::OperatorWithKernel { public: @@ -92,7 +92,7 @@ class PartialConcatOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto inputs = ctx.MultiInput("X"); + auto inputs = ctx.MultiInput("X"); auto input_data_type = framework::proto::VarType::Type(0); bool flag = 0; for (auto *input : inputs) { diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu index a6b2700a1a4da..ae36b85d8520f 100644 --- a/paddle/fluid/operators/partial_concat_op.cu +++ b/paddle/fluid/operators/partial_concat_op.cu @@ -24,7 +24,7 @@ namespace operators { #define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template __global__ void ConcatPartialCUDAKernel(T **in, @@ -72,8 +72,8 @@ template class PartialConcatOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto in_vars = ctx.MultiInput("X"); - Tensor *out = ctx.Output("Out"); + auto in_vars = ctx.MultiInput("X"); + Tensor *out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(in_vars[0] != nullptr, true, platform::errors::InvalidArgument( @@ -153,7 +153,7 @@ template class PartialConcatGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *out_grad = ctx.Input(framework::GradVarName("Out")); + auto *out_grad = ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto outs = ctx.MultiOutput(framework::GradVarName("X")); diff --git a/paddle/fluid/operators/partial_concat_op.h b/paddle/fluid/operators/partial_concat_op.h index 927ffbede6e6c..d81924298588b 100644 --- a/paddle/fluid/operators/partial_concat_op.h +++ b/paddle/fluid/operators/partial_concat_op.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) { PADDLE_ENFORCE_EQ( @@ -44,8 +44,8 @@ template class PartialConcatKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto ins = ctx.MultiInput("X"); - framework::Tensor* out = ctx.Output("Out"); + auto ins = ctx.MultiInput("X"); + phi::DenseTensor* out = ctx.Output("Out"); PADDLE_ENFORCE_EQ(ins[0] != nullptr, true, platform::errors::InvalidArgument( @@ -89,7 +89,7 @@ template class PartialConcatGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto outs = ctx.MultiOutput(framework::GradVarName("X")); diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index eb8271edccf95..aa2f30aaafc2c 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class PartialSumOp : public framework::OperatorWithKernel { public: @@ -94,7 +94,7 @@ class PartialSumOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto inputs = ctx.MultiInput("X"); + auto inputs = ctx.MultiInput("X"); auto input_data_type = framework::proto::VarType::Type(0); bool flag = 0; for (auto *input : inputs) { diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu index e0703532c1268..be1a34651e2d9 100644 --- a/paddle/fluid/operators/partial_sum_op.cu +++ b/paddle/fluid/operators/partial_sum_op.cu @@ -24,7 +24,7 @@ namespace operators { #define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template __global__ void SumArrayPartialCUDAKernel(T **in, @@ -77,8 +77,8 @@ template class PartialSumOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto in_vars = ctx.MultiInput("X"); - Tensor *out = ctx.Output("Out"); + auto in_vars = ctx.MultiInput("X"); + Tensor *out = ctx.Output("Out"); PADDLE_ENFORCE_EQ( in_vars[0] != nullptr, @@ -151,7 +151,8 @@ template class PartialSumGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const Tensor *out_grad = ctx.Input(framework::GradVarName("Out")); + const Tensor *out_grad = + ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto outs = ctx.MultiOutput(framework::GradVarName("X")); diff --git a/paddle/fluid/operators/partial_sum_op.h b/paddle/fluid/operators/partial_sum_op.h index a595630319220..35f104ef55a0a 100644 --- a/paddle/fluid/operators/partial_sum_op.h +++ b/paddle/fluid/operators/partial_sum_op.h @@ -21,14 +21,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class PartialSumKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto ins = ctx.MultiInput("X"); - Tensor* out = ctx.Output("Out"); + auto ins = ctx.MultiInput("X"); + phi::DenseTensor* out = ctx.Output("Out"); PADDLE_ENFORCE_EQ( ins[0] != nullptr, true, @@ -63,7 +63,7 @@ template class PartialSumGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); auto ins = ctx.MultiInput("X"); auto outs = ctx.MultiOutput(framework::GradVarName("X")); diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index e8b35b89157a3..dce9b1360a015 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -32,7 +32,7 @@ namespace operators { bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { if (ctx.Attr("adaptive") == false) return true; // (jczaja): oneDNN is supporting only unchangable in size pool window - auto src_tz = phi::vectorize(ctx.Input("X")->dims()); + auto src_tz = phi::vectorize(ctx.Input("X")->dims()); std::vector ksize = ctx.Attr>("ksize"); // Fast but not exhustive check return ((src_tz[src_tz.size() - 1] % ksize[1] == 0) && @@ -64,7 +64,7 @@ framework::OpKernelType PoolOp::GetExpectedKernelType( framework::OpKernelType PoolOp::GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && @@ -112,7 +112,7 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType( framework::OpKernelType PoolOpGrad::GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { #ifdef PADDLE_WITH_MKLDNN if ((expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN) && diff --git a/paddle/fluid/operators/pool_op.h b/paddle/fluid/operators/pool_op.h index 06b42e504f099..c08b589cbe12e 100644 --- a/paddle/fluid/operators/pool_op.h +++ b/paddle/fluid/operators/pool_op.h @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class PoolOp : public framework::OperatorWithKernel { public: @@ -34,7 +34,7 @@ class PoolOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; @@ -48,7 +48,7 @@ class PoolOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override; }; diff --git a/paddle/fluid/operators/pool_op_mlu.cc b/paddle/fluid/operators/pool_op_mlu.cc index 988eb182a16f0..e2af30faf36f4 100644 --- a/paddle/fluid/operators/pool_op_mlu.cc +++ b/paddle/fluid/operators/pool_op_mlu.cc @@ -46,8 +46,8 @@ class MLUPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); - const Tensor *in_x = ctx.Input("X"); - Tensor *out = ctx.Output("Out"); + const Tensor *in_x = ctx.Input("X"); + Tensor *out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); std::string pooling_type = ctx.Attr("pooling_type"); @@ -102,8 +102,8 @@ class MLUPoolOpKernel : public framework::OpKernel { // transpose NCHW to NHWC since cnnl pool2d has worse performance in that // layout. - framework::Tensor trans_in_x; - framework::Tensor trans_out; + phi::DenseTensor trans_in_x; + phi::DenseTensor trans_out; if (channel_last) { trans_in_x = *in_x; trans_out = *out; @@ -141,7 +141,7 @@ class MLUPoolOpKernel : public framework::OpKernel { handle, pool_mode, out_w, out_h, &extra_input_size); if (extra_input_size > 0) { - framework::Tensor extra_host_tensor; + phi::DenseTensor extra_host_tensor; extra_host_tensor.mutable_data( {static_cast(extra_input_size)}, platform::CPUPlace()); cnnlInitPoolingExtraInput(handle, @@ -149,7 +149,7 @@ class MLUPoolOpKernel : public framework::OpKernel { trans_in_x_desc.get(), trans_out_desc.get(), GetBasePtr(&extra_host_tensor)); - framework::Tensor extra_device_tensor = + phi::DenseTensor extra_device_tensor = ctx.AllocateTmpTensor( {static_cast(extra_input_size)}, dev_ctx); framework::TensorCopy( @@ -212,10 +212,12 @@ class MLUPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); - const Tensor *in_x = ctx.Input("X"); - const Tensor *out = ctx.Input("Out"); - const Tensor *out_grad = ctx.Input(framework::GradVarName("Out")); - Tensor *in_x_grad = ctx.Output(framework::GradVarName("X")); + const Tensor *in_x = ctx.Input("X"); + const Tensor *out = ctx.Input("Out"); + const Tensor *out_grad = + ctx.Input(framework::GradVarName("Out")); + Tensor *in_x_grad = + ctx.Output(framework::GradVarName("X")); in_x_grad->mutable_data(ctx.GetPlace()); std::string pooling_type = ctx.Attr("pooling_type"); @@ -249,10 +251,10 @@ class MLUPoolGradOpKernel : public framework::OpKernel { } // inputs need with NHWC layout - framework::Tensor trans_in_x; - framework::Tensor trans_out; - framework::Tensor trans_out_grad; - framework::Tensor trans_in_x_grad; + phi::DenseTensor trans_in_x; + phi::DenseTensor trans_out; + phi::DenseTensor trans_out_grad; + phi::DenseTensor trans_in_x_grad; if (channel_last) { trans_in_x = *in_x; trans_out = *out; @@ -300,7 +302,7 @@ class MLUPoolGradOpKernel : public framework::OpKernel { ceil_mode); if (pooling_type == "max") { - framework::Tensor index_tensor = + phi::DenseTensor index_tensor = ctx.AllocateTmpTensor(trans_out_grad.dims(), dev_ctx); MLUCnnlTensorDesc index_tensor_desc( diff --git a/paddle/fluid/operators/pool_op_npu.cc b/paddle/fluid/operators/pool_op_npu.cc index 7e9b0b65113cc..3fc83a8343c9d 100644 --- a/paddle/fluid/operators/pool_op_npu.cc +++ b/paddle/fluid/operators/pool_op_npu.cc @@ -24,8 +24,8 @@ class NPUPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); - const Tensor *in_x = ctx.Input("X"); - Tensor *out = ctx.Output("Out"); + const Tensor *in_x = ctx.Input("X"); + Tensor *out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); std::string pooling_type = ctx.Attr("pooling_type"); @@ -171,10 +171,12 @@ class NPUPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto &dev_ctx = ctx.template device_context(); - const Tensor *in_x = ctx.Input("X"); - const Tensor *out = ctx.Input("Out"); - const Tensor *out_grad = ctx.Input(framework::GradVarName("Out")); - Tensor *in_x_grad = ctx.Output(framework::GradVarName("X")); + const Tensor *in_x = ctx.Input("X"); + const Tensor *out = ctx.Input("Out"); + const Tensor *out_grad = + ctx.Input(framework::GradVarName("Out")); + Tensor *in_x_grad = + ctx.Output(framework::GradVarName("X")); in_x_grad->mutable_data(ctx.GetPlace()); std::string pooling_type = ctx.Attr("pooling_type"); diff --git a/paddle/fluid/operators/positive_negative_pair_op.h b/paddle/fluid/operators/positive_negative_pair_op.h index d9e55cac59fd8..e3cbeea2c6f15 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.h +++ b/paddle/fluid/operators/positive_negative_pair_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -34,16 +34,19 @@ class PositiveNegativePairKernel : public framework::OpKernel { }; void Compute(const framework::ExecutionContext& context) const override { - auto score_t = context.Input("Score"); - auto label_t = context.Input("Label"); - auto query_t = context.Input("QueryID"); - auto acc_positive_t = context.Input("AccumulatePositivePair"); - auto acc_negative_t = context.Input("AccumulateNegativePair"); - auto acc_neutral_t = context.Input("AccumulateNeutralPair"); - auto positive_t = context.Output("PositivePair"); - auto negative_t = context.Output("NegativePair"); - auto neutral_t = context.Output("NeutralPair"); - auto weight_t = context.Input("Weight"); + auto score_t = context.Input("Score"); + auto label_t = context.Input("Label"); + auto query_t = context.Input("QueryID"); + auto acc_positive_t = + context.Input("AccumulatePositivePair"); + auto acc_negative_t = + context.Input("AccumulateNegativePair"); + auto acc_neutral_t = + context.Input("AccumulateNeutralPair"); + auto positive_t = context.Output("PositivePair"); + auto negative_t = context.Output("NegativePair"); + auto neutral_t = context.Output("NeutralPair"); + auto weight_t = context.Input("Weight"); auto score = score_t->data(); auto label = label_t->data(); diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index f7abaf648ebcf..af61cc3c3f399 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; framework::OpKernelType innerGetKernelTypeForVar( const Tensor &tensor, const framework::OpKernelType &expected_kernel_type) { diff --git a/paddle/fluid/operators/prroi_pool_op.cc b/paddle/fluid/operators/prroi_pool_op.cc index cf8f17d5f747c..c4ebcde91b661 100644 --- a/paddle/fluid/operators/prroi_pool_op.cc +++ b/paddle/fluid/operators/prroi_pool_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class PRROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu index e95201c472af8..5d5d32ddbb0e8 100644 --- a/paddle/fluid/operators/prroi_pool_op.cu +++ b/paddle/fluid/operators/prroi_pool_op.cu @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; static constexpr int kNumCUDAThreads = 512; @@ -218,9 +218,9 @@ template class GPUPRROIPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto pooled_height = ctx.Attr("pooled_height"); auto pooled_width = ctx.Attr("pooled_width"); @@ -237,14 +237,14 @@ class GPUPRROIPoolOpKernel : public framework::OpKernel { if (rois_num == 0) return; // set rois batch id - framework::Tensor rois_batch_id_list; + phi::DenseTensor rois_batch_id_list; rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(platform::CPUPlace()); if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); - framework::Tensor batch_index_cpu; + auto* batchroinum = ctx.Input("BatchRoINums"); + phi::DenseTensor batch_index_cpu; framework::TensorCopySync( *batchroinum, platform::CPUPlace(), &batch_index_cpu); @@ -321,12 +321,14 @@ template class GPUPRROIPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Input("Out"); + auto* out = ctx.Input("Out"); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); auto* input_roi_grad = ctx.Output(framework::GradVarName("ROIs")); @@ -342,14 +344,14 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel { if (input_grad || input_roi_grad) { // set roi batch id - framework::Tensor rois_batch_id_list; + phi::DenseTensor rois_batch_id_list; rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(platform::CPUPlace()); if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); - framework::Tensor batch_index_cpu; + auto* batchroinum = ctx.Input("BatchRoINums"); + phi::DenseTensor batch_index_cpu; framework::TensorCopySync( *batchroinum, platform::CPUPlace(), &batch_index_cpu); diff --git a/paddle/fluid/operators/prroi_pool_op.h b/paddle/fluid/operators/prroi_pool_op.h index c071ce370e747..89782d500afcc 100644 --- a/paddle/fluid/operators/prroi_pool_op.h +++ b/paddle/fluid/operators/prroi_pool_op.h @@ -331,9 +331,9 @@ template class CPUPRROIPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto pooled_height = ctx.Attr("pooled_height"); auto pooled_width = ctx.Attr("pooled_width"); @@ -352,12 +352,12 @@ class CPUPRROIPoolOpKernel : public framework::OpKernel { const T* input_data = in->data(); - framework::Tensor rois_batch_id_list; + phi::DenseTensor rois_batch_id_list; rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(ctx.GetPlace()); if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); + auto* batchroinum = ctx.Input("BatchRoINums"); auto* batch_index = batchroinum->data(); int rois_batch_size = batchroinum->dims()[0]; size_t c = 0; @@ -485,15 +485,15 @@ template class CPUPRROIPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* out = ctx.Input("Out"); + auto* in = ctx.Input("X"); + auto* out = ctx.Input("Out"); auto* rois = ctx.Input("ROIs"); auto* output_grad = - ctx.Input(framework::GradVarName("Out")); + ctx.Input(framework::GradVarName("Out")); auto* input_grad = - ctx.Output(framework::GradVarName("X")); + ctx.Output(framework::GradVarName("X")); auto* input_roi_grad = - ctx.Output(framework::GradVarName("ROIs")); + ctx.Output(framework::GradVarName("ROIs")); auto pooled_height = ctx.Attr("pooled_height"); auto pooled_width = ctx.Attr("pooled_width"); @@ -511,12 +511,12 @@ class CPUPRROIPoolGradOpKernel : public framework::OpKernel { int rois_num = rois->dims()[0]; // set roi batch id - framework::Tensor rois_batch_id_list; + phi::DenseTensor rois_batch_id_list; rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(ctx.GetPlace()); if (ctx.HasInput("BatchRoINums") || rois->lod().empty()) { - auto* batchroinum = ctx.Input("BatchRoINums"); + auto* batchroinum = ctx.Input("BatchRoINums"); auto* batch_index = batchroinum->data(); int rois_batch_size = batchroinum->dims()[0]; size_t c = 0; diff --git a/paddle/fluid/operators/pscore/fake_init_op.cc b/paddle/fluid/operators/pscore/fake_init_op.cc index 967714bf446bd..f48166be3129f 100644 --- a/paddle/fluid/operators/pscore/fake_init_op.cc +++ b/paddle/fluid/operators/pscore/fake_init_op.cc @@ -32,7 +32,7 @@ class FakeInitOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &dev_place) const override { - framework::Tensor *tensor = nullptr; + phi::DenseTensor *tensor = nullptr; auto &out_var = *scope.FindVar(Output("Out")); diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.h b/paddle/fluid/operators/pull_box_extended_sparse_op.h index 2e71f0b23a2c9..8191f1dc882ae 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.h +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.h @@ -26,9 +26,9 @@ namespace operators { template static void PullBoxExtendedSparseFunctor( const framework::ExecutionContext &ctx) { - auto inputs = ctx.MultiInput("Ids"); - auto outputs = ctx.MultiOutput("Out"); - auto outputs_extend = ctx.MultiOutput("OutExtend"); + auto inputs = ctx.MultiInput("Ids"); + auto outputs = ctx.MultiOutput("Out"); + auto outputs_extend = ctx.MultiOutput("OutExtend"); const auto slot_size = inputs.size(); std::vector all_keys(slot_size); // BoxPS only supports float now @@ -63,9 +63,9 @@ static void PushBoxExtendedSparseFunctor( const framework::ExecutionContext &ctx) { auto inputs = ctx.MultiInput("Ids"); auto d_output = - ctx.MultiInput(framework::GradVarName("Out")); + ctx.MultiInput(framework::GradVarName("Out")); auto d_output_extend = - ctx.MultiInput(framework::GradVarName("OutExtend")); + ctx.MultiInput(framework::GradVarName("OutExtend")); const auto slot_size = inputs.size(); std::vector all_keys(slot_size); std::vector all_grad_values(slot_size * 2); diff --git a/paddle/fluid/operators/pull_box_sparse_op.h b/paddle/fluid/operators/pull_box_sparse_op.h index 25d8580f38fd8..44c41dd7aa7e6 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.h +++ b/paddle/fluid/operators/pull_box_sparse_op.h @@ -29,8 +29,8 @@ namespace operators { template static void PullBoxSparseFunctor(const framework::ExecutionContext &ctx) { - auto inputs = ctx.MultiInput("Ids"); - auto outputs = ctx.MultiOutput("Out"); + auto inputs = ctx.MultiInput("Ids"); + auto outputs = ctx.MultiOutput("Out"); const auto slot_size = inputs.size(); std::vector all_keys(slot_size); // BoxPS only supports float now @@ -63,7 +63,7 @@ template static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) { auto inputs = ctx.MultiInput("Ids"); auto d_output = - ctx.MultiInput(framework::GradVarName("Out")); + ctx.MultiInput(framework::GradVarName("Out")); const auto slot_size = inputs.size(); std::vector all_keys(slot_size); std::vector all_grad_values(slot_size); diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.h b/paddle/fluid/operators/pull_gpups_sparse_op.h index 0852a903645a6..c9da5a75c248f 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.h +++ b/paddle/fluid/operators/pull_gpups_sparse_op.h @@ -25,8 +25,8 @@ namespace operators { template static void PullGpuPSSparseFunctor(const framework::ExecutionContext &ctx) { - auto inputs = ctx.MultiInput("Ids"); - auto outputs = ctx.MultiOutput("Out"); + auto inputs = ctx.MultiInput("Ids"); + auto outputs = ctx.MultiOutput("Out"); auto embedding_size_vec = ctx.Attr>("size"); const auto slot_size = inputs.size(); std::vector all_keys(slot_size); @@ -59,7 +59,7 @@ template static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) { auto inputs = ctx.MultiInput("Ids"); auto d_output = - ctx.MultiInput(framework::GradVarName("Out")); + ctx.MultiInput(framework::GradVarName("Out")); const auto slot_size = inputs.size(); std::vector all_keys(slot_size); std::vector all_grad_values(slot_size); diff --git a/paddle/fluid/operators/put_along_axis_op.cc b/paddle/fluid/operators/put_along_axis_op.cc index 6c31a178eaeb1..65fdb4700964a 100644 --- a/paddle/fluid/operators/put_along_axis_op.cc +++ b/paddle/fluid/operators/put_along_axis_op.cc @@ -39,7 +39,7 @@ class PutAlongAxisOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -80,7 +80,7 @@ class PutAlongAxisGradOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index 0dd74f9324fa3..627f57c0e659c 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -28,7 +28,7 @@ extern "C" { namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; @@ -276,9 +276,9 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* bottom = ctx.Input("X"); - auto* _blobs_0 = ctx.Input("W"); - auto* _blobs_1 = ctx.Input("WhiteList"); - auto* _blobs_2 = ctx.Input("BlackList"); + auto* _blobs_0 = ctx.Input("W"); + auto* _blobs_1 = ctx.Input("WhiteList"); + auto* _blobs_2 = ctx.Input("BlackList"); auto* top = ctx.Output("Out"); auto* drop_pos = ctx.Output("DropPos"); @@ -513,7 +513,7 @@ class CPUPyramidHashOPGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* bottom = ctx.Input("X"); - auto* _blobs = ctx.Input("W"); + auto* _blobs = ctx.Input("W"); auto* drop_pos = ctx.Input("DropPos"); auto* top = ctx.Input(framework::GradVarName("Out")); diff --git a/paddle/fluid/operators/quantize_linear_op.cc b/paddle/fluid/operators/quantize_linear_op.cc index 7012da3aeda94..c03f158cac4e7 100644 --- a/paddle/fluid/operators/quantize_linear_op.cc +++ b/paddle/fluid/operators/quantize_linear_op.cc @@ -27,11 +27,11 @@ namespace operators { template struct ChannelDequantizeFunctorV2 { void operator()(const phi::CPUContext &dev_ctx, - const framework::Tensor *in, - const framework::Tensor *scale, + const phi::DenseTensor *in, + const phi::DenseTensor *scale, T max_range, const int quant_axis, - framework::Tensor *out) { + phi::DenseTensor *out) { // Dequant op is before quantized op // Dequantize the weight of quantized op auto in_dims = in->dims(); @@ -40,8 +40,8 @@ struct ChannelDequantizeFunctorV2 { if (quant_axis == 0) { for (int64_t i = 0; i < channel; i++) { T s = scale_factor[i]; - framework::Tensor one_channel_in = in->Slice(i, i + 1); - framework::Tensor one_channel_out = out->Slice(i, i + 1); + phi::DenseTensor one_channel_in = in->Slice(i, i + 1); + phi::DenseTensor one_channel_out = out->Slice(i, i + 1); auto in_e = framework::EigenVector::Flatten(one_channel_in); auto out_e = framework::EigenVector::Flatten(one_channel_out); auto &dev = *dev_ctx.eigen_device(); diff --git a/paddle/fluid/operators/quantize_linear_op.cu b/paddle/fluid/operators/quantize_linear_op.cu index 37ca11db3e3e2..c5d8b1928fd78 100644 --- a/paddle/fluid/operators/quantize_linear_op.cu +++ b/paddle/fluid/operators/quantize_linear_op.cu @@ -26,11 +26,11 @@ namespace operators { template struct ChannelDequantizeFunctorV2 { void operator()(const phi::GPUContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor* scale, + const phi::DenseTensor* in, + const phi::DenseTensor* scale, T max_range, const int quant_axis, - framework::Tensor* out) { + phi::DenseTensor* out) { auto in_dims = in->dims(); const T* in_data = in->data(); T* out_data = out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/quantize_linear_op.h b/paddle/fluid/operators/quantize_linear_op.h index fd0579023b378..8434996926aba 100644 --- a/paddle/fluid/operators/quantize_linear_op.h +++ b/paddle/fluid/operators/quantize_linear_op.h @@ -31,22 +31,22 @@ namespace operators { template struct ChannelDequantizeFunctorV2 { void operator()(const DeviceContext& dev_ctx, - const framework::Tensor* in, - const framework::Tensor** scales, + const phi::DenseTensor* in, + const phi::DenseTensor** scales, const int scale_num, T max_range, const int quant_axis, - framework::Tensor* out); + phi::DenseTensor* out); }; template class QuantizeLinearKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* in_scale = context.Input("Scale"); + auto* in = context.Input("X"); + auto* in_scale = context.Input("Scale"); - auto* out = context.Output("Y"); + auto* out = context.Output("Y"); out->mutable_data(context.GetPlace()); int bit_length = context.Attr("bit_length"); int round_type = context.Attr("round_type"); @@ -58,8 +58,8 @@ class QuantizeLinearKernel : public framework::OpKernel { if (quant_axis < 0) { if (!is_test) { // training - auto* in_accum = context.Input("InAccum"); - auto* in_state = context.Input("InState"); + auto* in_accum = context.Input("InAccum"); + auto* in_state = context.Input("InState"); phi::DenseTensor tmp_scale; tmp_scale.Resize(phi::make_dim(1)); T* cur_scale_data = dev_ctx.template Alloc(&tmp_scale); @@ -67,9 +67,9 @@ class QuantizeLinearKernel : public framework::OpKernel { FindAbsMaxFunctor()( dev_ctx, in->data(), in->numel(), cur_scale_data); - auto* out_state = context.Output("OutState"); - auto* out_accum = context.Output("OutAccum"); - auto* out_scale = context.Output("OutScale"); + auto* out_state = context.Output("OutState"); + auto* out_accum = context.Output("OutAccum"); + auto* out_scale = context.Output("OutScale"); out_state->mutable_data(context.GetPlace()); out_accum->mutable_data(context.GetPlace()); out_scale->mutable_data(context.GetPlace()); @@ -91,7 +91,7 @@ class QuantizeLinearKernel : public framework::OpKernel { } } else { if (!is_test) { - auto* out_scale = context.Output("OutScale"); + auto* out_scale = context.Output("OutScale"); T* out_scale_data = out_scale->mutable_data(context.GetPlace()); FindChannelAbsMaxFunctor()( dev_ctx, *in, quant_axis, out_scale_data); @@ -110,7 +110,7 @@ class DeQuantizeLinearKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto& dev_ctx = context.template device_context(); - auto* in = context.Input("X"); + auto* in = context.Input("X"); auto in_tmp = phi::Cast( static_cast { *in, experimental::CppTypeToDataType::Type()); - auto* scale = context.Input("Scale"); - auto* out = context.Output("Y"); + auto* scale = context.Input("Scale"); + auto* out = context.Output("Y"); int bit_length = context.Attr("bit_length"); auto quant_axis = context.Attr("quant_axis"); out->mutable_data(dev_ctx.GetPlace()); diff --git a/paddle/fluid/operators/quantize_op.h b/paddle/fluid/operators/quantize_op.h index dd1b3c42fb5f9..46a0469c806e1 100644 --- a/paddle/fluid/operators/quantize_op.h +++ b/paddle/fluid/operators/quantize_op.h @@ -23,7 +23,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; class QuantOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/random_routing_op.cu b/paddle/fluid/operators/random_routing_op.cu index 0b8aaf2d97078..e59b0263c0dd5 100644 --- a/paddle/fluid/operators/random_routing_op.cu +++ b/paddle/fluid/operators/random_routing_op.cu @@ -30,7 +30,7 @@ static inline int GET_BLOCKS(const int N) { } using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template __global__ void random_routing_kernel(int64_t* data, diff --git a/paddle/fluid/operators/randperm_op.h b/paddle/fluid/operators/randperm_op.h index 3f4b02065a0fd..5512471fc2cdf 100644 --- a/paddle/fluid/operators/randperm_op.h +++ b/paddle/fluid/operators/randperm_op.h @@ -45,7 +45,7 @@ class RandpermKernel : public framework::OpKernel { int n = ctx.Attr("n"); unsigned int seed = static_cast(ctx.Attr("seed")); framework::Variable* out_var = ctx.OutputVar("Out"); - framework::Tensor* out_tensor = + phi::DenseTensor* out_tensor = framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); if (platform::is_cpu_place(ctx.GetPlace())) { @@ -53,7 +53,7 @@ class RandpermKernel : public framework::OpKernel { random_permate(out_data, n, seed); } else { - framework::Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; tmp_tensor.Resize(phi::make_ddim({n})); T* tmp_data = tmp_tensor.mutable_data(platform::CPUPlace()); random_permate(tmp_data, n, seed); diff --git a/paddle/fluid/operators/randperm_op_mlu.cc b/paddle/fluid/operators/randperm_op_mlu.cc index a3ebf8f5c00fc..2dcb0ff27e1ca 100644 --- a/paddle/fluid/operators/randperm_op_mlu.cc +++ b/paddle/fluid/operators/randperm_op_mlu.cc @@ -25,10 +25,10 @@ class RandpermMLUKernel : public framework::OpKernel { int n = ctx.Attr("n"); unsigned int seed = static_cast(ctx.Attr("seed")); framework::Variable* out_var = ctx.OutputVar("Out"); - framework::Tensor* out_tensor = + phi::DenseTensor* out_tensor = framework::GetMutableLoDTensorOrSelectedRowsValueFromVar(out_var); - framework::Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; tmp_tensor.Resize(phi::make_ddim({n})); T* tmp_data = tmp_tensor.mutable_data(platform::CPUPlace()); random_permate(tmp_data, n, seed); diff --git a/paddle/fluid/operators/range_op.cc b/paddle/fluid/operators/range_op.cc index ab9580d5ba95c..8a965034ac45a 100644 --- a/paddle/fluid/operators/range_op.cc +++ b/paddle/fluid/operators/range_op.cc @@ -31,7 +31,7 @@ class RangeOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (platform::is_xpu_place(tensor.place())) { return framework::OpKernelType( diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index 1dd1c694bb91c..e59d4f3cfcadd 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -52,10 +52,10 @@ template class CPURangeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - T start = context.Input("Start")->data()[0]; - T end = context.Input("End")->data()[0]; - T step = context.Input("Step")->data()[0]; - auto* out = context.Output("Out"); + T start = context.Input("Start")->data()[0]; + T end = context.Input("End")->data()[0]; + T step = context.Input("Step")->data()[0]; + auto* out = context.Output("Out"); int64_t size = 0; GetSize(start, end, step, &size); out->Resize(phi::make_ddim({size})); diff --git a/paddle/fluid/operators/range_op_mlu.cc b/paddle/fluid/operators/range_op_mlu.cc index 3e15e0ced0a8f..13d067f8421ad 100644 --- a/paddle/fluid/operators/range_op_mlu.cc +++ b/paddle/fluid/operators/range_op_mlu.cc @@ -21,12 +21,12 @@ template class RangeMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* start_t = context.Input("Start"); - auto* end_t = context.Input("End"); - auto* step_t = context.Input("Step"); - auto* out = context.Output("Out"); + auto* start_t = context.Input("Start"); + auto* end_t = context.Input("End"); + auto* step_t = context.Input("Step"); + auto* out = context.Output("Out"); - framework::Tensor n; + phi::DenseTensor n; framework::TensorCopy( *start_t, platform::CPUPlace(), diff --git a/paddle/fluid/operators/range_op_npu.cc b/paddle/fluid/operators/range_op_npu.cc index 9c063259f82e4..c9985187f5fc1 100644 --- a/paddle/fluid/operators/range_op_npu.cc +++ b/paddle/fluid/operators/range_op_npu.cc @@ -22,12 +22,12 @@ template class RangeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* start_t = context.Input("Start"); - auto* end_t = context.Input("End"); - auto* step_t = context.Input("Step"); - auto* out = context.Output("Out"); + auto* start_t = context.Input("Start"); + auto* end_t = context.Input("End"); + auto* step_t = context.Input("Step"); + auto* out = context.Output("Out"); - framework::Tensor n; + phi::DenseTensor n; framework::TensorCopy( *start_t, platform::CPUPlace(), diff --git a/paddle/fluid/operators/rank_attention_op.cc b/paddle/fluid/operators/rank_attention_op.cc index f68e1668aa9a7..4c740c5985ade 100644 --- a/paddle/fluid/operators/rank_attention_op.cc +++ b/paddle/fluid/operators/rank_attention_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class RankAttentionOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/rank_attention_op.cu b/paddle/fluid/operators/rank_attention_op.cu index 83f6f23f98506..36117e605031e 100644 --- a/paddle/fluid/operators/rank_attention_op.cu +++ b/paddle/fluid/operators/rank_attention_op.cu @@ -24,20 +24,18 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - template class RankAttentionCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *X = ctx.Input("X"); - auto *rank_offset = ctx.Input("RankOffset"); - auto *param = ctx.Input("RankParam"); - auto *input_help = ctx.Output("InputHelp"); - auto *ins_rank = ctx.Output("InsRank"); + auto *X = ctx.Input("X"); + auto *rank_offset = ctx.Input("RankOffset"); + auto *param = ctx.Input("RankParam"); + auto *input_help = ctx.Output("InputHelp"); + auto *ins_rank = ctx.Output("InsRank"); int max_rank = ctx.Attr("MaxRank"); int64_t max_size = ctx.Attr("MaxSize"); - auto *Out = ctx.Output("Out"); + auto *Out = ctx.Output("Out"); // check dims auto x_dims = X->dims(); @@ -66,7 +64,7 @@ class RankAttentionCUDAKernel : public framework::OpKernel { int max_ins = std::max(ins_num, max_size); - Tensor param_help; + phi::DenseTensor param_help; param_help = ctx.AllocateTmpTensor( {max_ins * block_matrix_row, para_col}, dev_ctx); param_help.mutable_data(ctx.GetPlace()); @@ -156,15 +154,17 @@ template class RankAttentionGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *X = ctx.Input("X"); // not use data - auto *rank_offset = ctx.Input("RankOffset"); // not use data - auto *param = ctx.Input("RankParam"); // not use data - auto *input_help = ctx.Input("InputHelp"); - auto *ins_rank = ctx.Input("InsRank"); - auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *X = ctx.Input("X"); // not use data + auto *rank_offset = + ctx.Input("RankOffset"); // not use data + auto *param = ctx.Input("RankParam"); // not use data + auto *input_help = ctx.Input("InputHelp"); + auto *ins_rank = ctx.Input("InsRank"); + auto *dout = ctx.Input(framework::GradVarName("Out")); int64_t max_size = ctx.Attr("MaxSize"); - auto *drank_para = ctx.Output(framework::GradVarName("RankParam")); + auto *drank_para = + ctx.Output(framework::GradVarName("RankParam")); // get dim auto x_dims = X->dims(); @@ -188,7 +188,7 @@ class RankAttentionGradOpCUDAKernel : public framework::OpKernel { drank_para_eigen.constant(static_cast(0)); // copy data - Tensor param_grad; + phi::DenseTensor param_grad; param_grad = ctx.AllocateTmpTensor( {max_ins * block_matrix_row, para_col}, dev_ctx); param_grad.mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/rank_loss_op.h b/paddle/fluid/operators/rank_loss_op.h index 3e02cfb3fc1e0..4c81129c0efb0 100644 --- a/paddle/fluid/operators/rank_loss_op.h +++ b/paddle/fluid/operators/rank_loss_op.h @@ -25,10 +25,10 @@ template class RankLossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* out_t = ctx.Output("Out"); - auto* label_t = ctx.Input("Label"); - auto* left_t = ctx.Input("Left"); - auto* right_t = ctx.Input("Right"); + auto* out_t = ctx.Output("Out"); + auto* label_t = ctx.Input("Label"); + auto* left_t = ctx.Input("Left"); + auto* right_t = ctx.Input("Right"); out_t->mutable_data(ctx.GetPlace()); auto out = framework::EigenVector::Flatten(*out_t); @@ -47,14 +47,14 @@ class RankLossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { auto* d_left_t = - ctx.Output(framework::GradVarName("Left")); + ctx.Output(framework::GradVarName("Left")); auto* d_right_t = - ctx.Output(framework::GradVarName("Right")); + ctx.Output(framework::GradVarName("Right")); - auto* d_out_t = ctx.Input(framework::GradVarName("Out")); - auto* label_t = ctx.Input("Label"); - auto* left_t = ctx.Input("Left"); - auto* right_t = ctx.Input("Right"); + auto* d_out_t = ctx.Input(framework::GradVarName("Out")); + auto* label_t = ctx.Input("Label"); + auto* left_t = ctx.Input("Left"); + auto* right_t = ctx.Input("Right"); auto& dev = *ctx.template device_context().eigen_device(); auto d_out = framework::EigenVector::Flatten(*d_out_t); diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 03112fcd9ee58..ab15d0589d7c3 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -192,7 +192,7 @@ void RecurrentBase::LinkTensor(const framework::Scope &src_scope, src_vars, dst_scope, dst_vars, - [&](const framework::Tensor &src, framework::Tensor *dst) { + [&](const phi::DenseTensor &src, phi::DenseTensor *dst) { dst->ShareDataWith(src); }); } @@ -247,8 +247,8 @@ void RecurrentOp::RunImpl(const framework::Scope &scope, Inputs(kInputs), &cur_scope, Inputs(kInputs), - [&seq_offset](const framework::Tensor &outside, - framework::Tensor *inside) { + [&seq_offset](const phi::DenseTensor &outside, + phi::DenseTensor *inside) { inside->ShareDataWith(outside.Slice(seq_offset, seq_offset + 1)); auto dims = phi::vectorize(inside->dims()); dims.erase(dims.begin()); @@ -374,7 +374,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, Inputs(kOutputGrads), &cur_scope, Inputs(kOutputGrads), - [&](const framework::Tensor &outside, framework::Tensor *inside) { + [&](const phi::DenseTensor &outside, phi::DenseTensor *inside) { inside->ShareDataWith(outside.Slice(seq_offset, seq_offset + 1)); auto dims = phi::vectorize(inside->dims()); dims.erase(dims.begin()); @@ -439,7 +439,7 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, 0) { // Inside Gradient is not created. return; } - framework::Tensor src_slice = + phi::DenseTensor src_slice = src_tensor.Slice(seq_offset, seq_offset + 1); dst_tensor->ShareDataWith(src_slice); }, diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index 434b32329cfaa..68615a44e97c8 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -25,8 +25,8 @@ template class XPULogsumexpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output = context.Output("Out"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); auto axis = context.Attr>("axis"); auto reduce_all = context.Attr("reduce_all"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc index 34d45e0ae5f32..ce06d1b1089a5 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu.cc @@ -21,14 +21,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ReduceAnyNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - const Tensor* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + const phi::DenseTensor* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); bool keep_dim = ctx.Attr("keep_dim"); auto dims = ctx.Attr>("dim"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc index 4f76e47069b5e..d652f8b805222 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op_npu_test.cc @@ -33,7 +33,7 @@ limitations under the License. */ namespace f = paddle::framework; namespace p = paddle::platform; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; USE_OP_ITSELF(reduce_any); USE_OP_DEVICE_KERNEL(reduce_any, NPU); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc index 310c1db205da6..1ece3bdf72616 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_mlu.cc @@ -22,8 +22,8 @@ template class ReduceMaxMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output = context.Output("Out"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); int out_dtype = context.Attr("out_dtype"); bool reduce_all = context.Attr("reduce_all"); auto dims = context.Attr>("dim"); @@ -45,7 +45,7 @@ class ReduceMaxMLUKernel : public framework::OpKernel { } auto place = context.GetPlace(); - framework::Tensor cast_out(input->type()); + phi::DenseTensor cast_out(input->type()); cast_out.Resize(output->dims()); cast_out.mutable_data(place); diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc index 13be33bae3db7..172786963e4c9 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_npu.cc @@ -18,13 +18,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ReduceMaxNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto dims = ctx.Attr>("dim"); bool keep_dim = ctx.Attr("keep_dim"); bool reduce_all = ctx.Attr("reduce_all"); @@ -32,7 +32,7 @@ class ReduceMaxNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); - framework::Tensor cast_out(x->type()); + phi::DenseTensor cast_out(x->type()); cast_out.Resize(out->dims()); cast_out.mutable_data(place); @@ -115,9 +115,10 @@ template class ReduceMaxGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Input("Out"); - auto* out_grad = context.Input(framework::GradVarName("Out")); + auto* x = context.Input("X"); + auto* out = context.Input("Out"); + auto* out_grad = + context.Input(framework::GradVarName("Out")); auto reduce_dims = context.Attr>("dim"); bool reduce_all = context.Attr("reduce_all"); int in_dtype = context.Attr("in_dtype"); @@ -128,7 +129,8 @@ class ReduceMaxGradNPUKernel : public framework::OpKernel { platform::errors::InvalidArgument( "NPU only support in_dtype == -1 in reduce_max_grad op.")); - auto* x_grad = context.Output(framework::GradVarName("X")); + auto* x_grad = + context.Output(framework::GradVarName("X")); x_grad->mutable_data(context.GetPlace()); auto& dev_ctx = diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc index 1faffd57c9ab3..b73bde6275347 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_mlu.cc @@ -29,9 +29,11 @@ template class ReduceMeanGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output_grad = context.Input(framework::GradVarName("Out")); - auto* input_grad = context.Output(framework::GradVarName("X")); + auto* input = context.Input("X"); + auto* output_grad = + context.Input(framework::GradVarName("Out")); + auto* input_grad = + context.Output(framework::GradVarName("X")); input_grad->mutable_data(context.GetPlace()); bool reduce_all = context.Attr("reduce_all"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc index 4e277d2c62231..feca58ce19861 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc @@ -22,8 +22,8 @@ template class NPUReduceMeanOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); output->mutable_data(ctx.GetPlace()); bool reduce_all = ctx.Attr("reduce_all"); @@ -56,9 +56,11 @@ template class NPUReduceMeanGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); - auto* input_grad = ctx.Output(framework::GradVarName("X")); + auto* input = ctx.Input("X"); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); input_grad->mutable_data(ctx.GetPlace()); bool reduce_all = ctx.Attr("reduce_all"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op_mlu.cc index 43879af06ea59..631b32e59c822 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_min_op_mlu.cc @@ -22,8 +22,8 @@ template class ReduceMinMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* output = context.Output("Out"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); int out_dtype = context.Attr("out_dtype"); bool reduce_all = context.Attr("reduce_all"); auto dims = context.Attr>("dim"); @@ -45,7 +45,7 @@ class ReduceMinMLUKernel : public framework::OpKernel { } auto place = context.GetPlace(); - framework::Tensor cast_out(input->type()); + phi::DenseTensor cast_out(input->type()); cast_out.Resize(output->dims()); cast_out.mutable_data(place); diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc index 70d995284a288..19efb2e6bfb4c 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_min_op_npu.cc @@ -18,13 +18,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ReduceMinNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto dims = ctx.Attr>("dim"); bool keep_dim = ctx.Attr("keep_dim"); bool reduce_all = ctx.Attr("reduce_all"); @@ -32,7 +32,7 @@ class ReduceMinNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); - framework::Tensor cast_out(x->type()); + phi::DenseTensor cast_out(x->type()); cast_out.Resize(out->dims()); cast_out.mutable_data(place); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h index d7f153700cfa2..a62bac88ca399 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h @@ -32,8 +32,8 @@ template void TensorReduceImpl(const phi::GPUContext& dev_ctx, - const framework::Tensor& x, - framework::Tensor* y, + const phi::DenseTensor& x, + phi::DenseTensor* y, const TransformOp& transform, const std::vector& origin_reduce_dims, gpuStream_t stream, diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index d305a65e0d133..991fdfeed176c 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -48,7 +48,7 @@ namespace operators { keep_dim); \ } -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; inline void GetShuffledDim(const DDim& src_dims, @@ -101,8 +101,8 @@ static inline std::vector GetReduceDim(const std::vector& dims, } template void GetShuffledInput(const framework::ExecutionContext& context, - const Tensor* input, - Tensor* shuffled_input, + const phi::DenseTensor* input, + phi::DenseTensor* shuffled_input, const std::vector& dims) { DDim shuffled_dims(input->dims()); std::vector perm_axis(input->dims().size()); @@ -132,8 +132,8 @@ inline void GetOriginDimFromShuffled(const DDim& src_dim, template void HandleLargeDim(const framework::ExecutionContext& context, - const Tensor* input, - Tensor* output, + const phi::DenseTensor* input, + phi::DenseTensor* output, const std::vector& dims, bool keep_dim) { // shuffle the reduced dim to the end @@ -157,10 +157,10 @@ void HandleLargeDim(const framework::ExecutionContext& context, template void HandleLargeDimGrad(const framework::ExecutionContext& context, - const framework::Tensor* x, - const framework::Tensor* out, - const framework::Tensor* dout, - framework::Tensor* dx, + const phi::DenseTensor* x, + const phi::DenseTensor* out, + const phi::DenseTensor* dout, + phi::DenseTensor* dx, Functor functor, const std::vector& dims) { const int64_t unreduced = out->numel(); @@ -198,14 +198,14 @@ void HandleLargeDimGrad(const framework::ExecutionContext& context, template struct ReduceKernelFunctor { - const Tensor* input; - Tensor* output; + const phi::DenseTensor* input; + phi::DenseTensor* output; std::vector dims; bool keep_dim; bool reduce_all; const framework::ExecutionContext& context; - ReduceKernelFunctor(const Tensor* input, - Tensor* output, + ReduceKernelFunctor(const phi::DenseTensor* input, + phi::DenseTensor* output, const std::vector& dims, bool keep_dim, bool reduce_all, @@ -261,12 +261,12 @@ class ReduceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { bool reduce_all = context.Attr("reduce_all"); - auto* output = context.Output("Out"); + auto* output = context.Output("Out"); auto dims = context.Attr>("dim"); bool keep_dim = context.Attr("keep_dim"); int out_dtype = context.Attr("out_dtype"); framework::proto::VarType::Type cast_out_dtype; - auto* input = context.Input("X"); + auto* input = context.Input("X"); if (out_dtype < 0) { cast_out_dtype = static_cast( @@ -299,10 +299,10 @@ class ReduceKernel : public framework::OpKernel { template void LaunchReduceGradKernel(const framework::ExecutionContext& context, - const framework::Tensor* input0, - const framework::Tensor* input1, - const framework::Tensor* input2, - paddle::framework::Tensor* output, + const phi::DenseTensor* input0, + const phi::DenseTensor* input1, + const phi::DenseTensor* input2, + phi::DenseTensor* output, Functor functor, const std::vector& dims, bool reduce_all = false) { @@ -400,18 +400,20 @@ template class ReduceGradKernel : public framework::OpKernel { public: - void ComputeFromInput(const Tensor* input2, + void ComputeFromInput(const phi::DenseTensor* input2, const framework::ExecutionContext& context) const { bool reduce_all = context.Attr("reduce_all"); auto dims = context.Attr>("dim"); - auto* input0 = context.Input("X"); - auto* input1 = context.Input("Out"); + auto* input0 = context.Input("X"); + auto* input1 = context.Input("Out"); - auto* output = context.Output(framework::GradVarName("X")); + auto* output = + context.Output(framework::GradVarName("X")); output->mutable_data(context.GetPlace()); // The dims has full dim, set the reduce_all is True - const auto& input_dim_size = context.Input("X")->dims().size(); + const auto& input_dim_size = + context.Input("X")->dims().size(); std::set dims_set(dims.begin(), dims.end()); bool full_dim = true; for (auto i = 0; i < input_dim_size; i++) { @@ -452,7 +454,8 @@ class ReduceGradKernel : public framework::OpKernel { int in_dtype = context.Attr("in_dtype"); if (in_dtype >= 0) { Tensor tmp_tensor; - auto* pre_input = context.Input(framework::GradVarName("Out")); + auto* pre_input = + context.Input(framework::GradVarName("Out")); auto in_kernel_type = framework::OpKernelType( framework::TransToProtoVarType(pre_input->dtype()), context.GetPlace()); @@ -464,7 +467,8 @@ class ReduceGradKernel : public framework::OpKernel { ComputeFromInput(&tmp_tensor, context); } else { - auto* input2 = context.Input(framework::GradVarName("Out")); + auto* input2 = + context.Input(framework::GradVarName("Out")); ComputeFromInput(input2, context); } } @@ -666,7 +670,7 @@ class ReduceGradOp : public framework::OperatorWithKernel { ctx, framework::GradVarName("Out")); #ifdef PADDLE_WITH_MKLDNN auto CanMKLDNNReduceGradBeUsed = [&]() { - auto dx_dims = ctx.Input("X")->dims(); + auto dx_dims = ctx.Input("X")->dims(); if (dx_dims.size() > 5) return false; // max 5D tensor is supported @@ -745,8 +749,8 @@ class ReduceCudaKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { bool reduce_all = context.Attr("reduce_all"); - const Tensor* input = context.Input("X"); - Tensor* output = context.Output("Out"); + const phi::DenseTensor* input = context.Input("X"); + phi::DenseTensor* output = context.Output("Out"); auto out_dtype = context.Attr("out_dtype"); auto pt_out_dtype = paddle::framework::TransToPhiDataType( static_cast(out_dtype)); @@ -777,11 +781,11 @@ class ReduceCudaGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { bool reduce_all = context.Attr("reduce_all"); std::vector dims = context.Attr>("dim"); - auto* in_x = context.Input("X"); + auto* in_x = context.Input("X"); auto* d_out = - context.Input(framework::GradVarName("Out")); - auto* d_x = context.Output(framework::GradVarName("X")); + context.Input(framework::GradVarName("Out")); + auto* d_x = context.Output(framework::GradVarName("X")); auto out_dtype = context.Attr("in_dtype"); auto pt_out_dtype = framework::TransToPhiDataType( static_cast(out_dtype)); @@ -795,7 +799,7 @@ class ReduceCudaGradKernel : public framework::OpKernel { update_dims[i] = 1; } // make new tensor - framework::Tensor new_d_out(d_out->type()); + phi::DenseTensor new_d_out(d_out->type()); new_d_out.ShareDataWith(*d_out); new_d_out.Resize(phi::make_ddim(update_dims)); auto& dev_ctx = context.cuda_device_context(); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_function.h b/paddle/fluid/operators/reduce_ops/reduce_op_function.h index 5f02a475d7a91..39a0dc044f272 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_function.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_function.h @@ -21,7 +21,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; template void ReduceFunctor(const DeviceContext& context, - const framework::Tensor& input, - framework::Tensor* output, + const phi::DenseTensor& input, + phi::DenseTensor* output, const std::vector& dims, bool keep_dim) { auto x = EigenTensor::From(input); @@ -81,10 +81,10 @@ void ReduceFunctor(const DeviceContext& context, template void ReduceGradFunctor(const DeviceContext& context, - const framework::Tensor& input0, - const framework::Tensor& input1, - const framework::Tensor& input2, - framework::Tensor* output, + const phi::DenseTensor& input0, + const phi::DenseTensor& input1, + const phi::DenseTensor& input2, + phi::DenseTensor* output, Functor functor, const std::vector& dims) { auto x = EigenTensor::From(input0); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h b/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h index 27c5f144bef04..6af7967b81150 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h @@ -31,8 +31,8 @@ void MLUReduceOp(const framework::ExecutionContext& context, platform::is_mlu_place(context.GetPlace()), true, platform::errors::Unavailable("This kernel only runs on MLU.")); - auto* input = context.Input("X"); - auto* output = context.Output("Out"); + auto* input = context.Input("X"); + auto* output = context.Output("Out"); output->mutable_data(context.GetPlace()); bool reduce_all = context.Attr("reduce_all"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index 57df3c1a887f5..35cc8fea6d0ba 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -40,8 +40,8 @@ void XPUReduce(const framework::ExecutionContext& context, platform::errors::Unavailable("This kernel only runs on XPU.")); bool reduce_all = context.Attr("reduce_all"); auto dims = context.Attr>("dim"); - auto* x = context.Input("X"); - auto* y = context.Output("Out"); + auto* x = context.Input("X"); + auto* y = context.Output("Out"); y->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc index 5c94bfc4bd0a8..85b589ebf916e 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op_npu.cc @@ -18,13 +18,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ReduceProdNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto dims = ctx.Attr>("dim"); bool keep_dim = ctx.Attr("keep_dim"); bool reduce_all = ctx.Attr("reduce_all"); @@ -32,7 +32,7 @@ class ReduceProdNPUKernel : public framework::OpKernel { auto place = ctx.GetPlace(); - framework::Tensor cast_out(x->type()); + phi::DenseTensor cast_out(x->type()); cast_out.Resize(out->dims()); cast_out.mutable_data(place); diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h index e0c11feb036f2..69c8935dafd6b 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.h @@ -28,12 +28,13 @@ template class ReduceSumGradKernel : public framework::OpKernel { public: - void ComputeFromInput(const Tensor* input2, + void ComputeFromInput(const phi::DenseTensor* input2, const framework::ExecutionContext& context) const { auto dims = context.Attr>("dim"); - auto* input0 = context.Input("X"); + auto* input0 = context.Input("X"); - auto* output = context.Output(framework::GradVarName("X")); + auto* output = + context.Output(framework::GradVarName("X")); output->mutable_data(context.GetPlace()); const auto* input2_d = input2->data(); auto* output_d = output->data(); @@ -80,7 +81,8 @@ class ReduceSumGradKernel : public framework::OpKernel { if (in_dtype >= 0) { Tensor tmp_tensor; - auto* pre_input = context.Input(framework::GradVarName("Out")); + auto* pre_input = + context.Input(framework::GradVarName("Out")); auto in_kernel_type = framework::OpKernelType( framework::TransToProtoVarType(pre_input->dtype()), context.GetPlace()); @@ -91,7 +93,8 @@ class ReduceSumGradKernel : public framework::OpKernel { in_kernel_type, out_kernel_type, *pre_input, &tmp_tensor); ComputeFromInput(&tmp_tensor, context); } else { - auto* input2 = context.Input(framework::GradVarName("Out")); + auto* input2 = + context.Input(framework::GradVarName("Out")); ComputeFromInput(input2, context); } return; diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc index e8b66a2bf2f7c..4ecf6e907b4cb 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_mlu.cc @@ -29,9 +29,11 @@ template class ReduceSumGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in = context.Input("X"); - auto* out_grad = context.Input(framework::GradVarName("Out")); - auto* in_grad = context.Output(framework::GradVarName("X")); + auto* in = context.Input("X"); + auto* out_grad = + context.Input(framework::GradVarName("Out")); + auto* in_grad = + context.Output(framework::GradVarName("X")); in_grad->mutable_data(context.GetPlace()); bool reduce_all = context.Attr("reduce_all"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc index e3b5755d1a6b9..6ba8a9c1373a1 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc @@ -26,8 +26,8 @@ template class ReduceSumNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); bool reduce_all = ctx.Attr("reduce_all"); bool keep_dims = ctx.Attr("keep_dim"); auto dims = ctx.Attr>("dim"); @@ -43,8 +43,8 @@ class ReduceSumNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - framework::Tensor cast_x; - framework::Tensor cast_out; + phi::DenseTensor cast_x; + phi::DenseTensor cast_out; // NOTE: ReduceSumD only supports fp32 and fp16 if (framework::TransToProtoVarType(x->dtype()) != framework::proto::VarType::FP32 && @@ -106,10 +106,9 @@ template class ReduceSumGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); - auto* x_grad = ctx.Output(framework::GradVarName("X")); + auto* x = ctx.Input("X"); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* x_grad = ctx.Output(framework::GradVarName("X")); bool reduce_all = ctx.Attr("reduce_all"); bool keep_dims = ctx.Attr("keep_dim"); auto dims = ctx.Attr>("dim"); diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc index 1d36bdb284121..29f24de021b49 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc @@ -36,9 +36,10 @@ class ReduceSumGradXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto dims = context.Attr>("dim"); bool reduce_all = context.Attr("reduce_all"); - auto* x = context.Input("X"); - auto* out = context.Input(framework::GradVarName("Out")); - auto* x_grad = context.Output(framework::GradVarName("X")); + auto* x = context.Input("X"); + auto* out = context.Input(framework::GradVarName("Out")); + auto* x_grad = + context.Output(framework::GradVarName("X")); int in_dtype = context.Attr("in_dtype"); PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/repeat_interleave_op.cc b/paddle/fluid/operators/repeat_interleave_op.cc index a3f04dd202a3c..aaef332bd0007 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cc +++ b/paddle/fluid/operators/repeat_interleave_op.cc @@ -21,8 +21,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class RepeatInterleaveOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/requantize_op.h b/paddle/fluid/operators/requantize_op.h index 8166aa98f076f..5b2f0148f1529 100644 --- a/paddle/fluid/operators/requantize_op.h +++ b/paddle/fluid/operators/requantize_op.h @@ -23,7 +23,6 @@ namespace paddle { namespace operators { using framework::OpKernelType; -using framework::Tensor; class ReQuantOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 6a25e2c790287..f54f4880747a5 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -41,7 +41,7 @@ class OpBase; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class ReshapeOp : public framework::OperatorWithKernel { public: @@ -383,7 +383,7 @@ class ReshapeKernel { auto *in = ctx.Input("X"); auto list_new_shape_tensor = - ctx.MultiInput("ShapeTensor"); + ctx.MultiInput("ShapeTensor"); auto *shape_tensor = ctx.HasInput("Shape") ? ctx.Input("Shape") : nullptr; @@ -394,7 +394,7 @@ class ReshapeKernel { for (auto &tensor : list_new_shape_tensor) { if (platform::is_gpu_place(tensor->place()) || platform::is_xpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync( *tensor, platform::CPUPlace(), &temp); pt_vec_shape.push_back(std::move(temp)); @@ -407,7 +407,7 @@ class ReshapeKernel { phi::DenseTensor pt_shape; if (platform::is_gpu_place(shape_tensor->place()) || platform::is_xpu_place(shape_tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync( *shape_tensor, platform::CPUPlace(), &temp); pt_shape = std::move(temp); @@ -450,8 +450,8 @@ class ReshapeKernel { class ReshapeGradKernel { public: void operator()(const framework::ExecutionContext &ctx) const { - auto *d_out = ctx.Input(framework::GradVarName("Out")); - auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_out = ctx.Input(framework::GradVarName("Out")); + auto *d_x = ctx.Output(framework::GradVarName("X")); d_x->mutable_data(ctx.GetPlace(), d_out->type()); if (platform::is_cpu_place(ctx.GetPlace())) { @@ -479,9 +479,9 @@ class ReshapeGradKernel { class ReshapeDoubleGradKernel { public: void operator()(const framework::ExecutionContext &ctx) const { - auto *dd_x = ctx.Input("DDX"); - auto *d_out = ctx.Input("DOut"); - auto *dd_out = ctx.Output("DDOut"); + auto *dd_x = ctx.Input("DDX"); + auto *d_out = ctx.Input("DOut"); + auto *dd_out = ctx.Output("DDOut"); dd_out->mutable_data(ctx.GetPlace(), dd_x->type()); if (platform::is_cpu_place(ctx.GetPlace())) { diff --git a/paddle/fluid/operators/reshape_op_mlu.cc b/paddle/fluid/operators/reshape_op_mlu.cc index 46ab9534b6801..fa04ea6a3e50f 100644 --- a/paddle/fluid/operators/reshape_op_mlu.cc +++ b/paddle/fluid/operators/reshape_op_mlu.cc @@ -22,11 +22,11 @@ template class Reshape2MLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); std::vector target_shape_vector; - auto shape_tensor_vector = ctx.MultiInput("ShapeTensor"); + auto shape_tensor_vector = ctx.MultiInput("ShapeTensor"); if (shape_tensor_vector.size() > 0) { for (auto* shape_tensor : shape_tensor_vector) { PADDLE_ENFORCE_EQ( @@ -117,8 +117,8 @@ template class Reshape2GradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* d_x = ctx.Output(framework::GradVarName("X")); - auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_out = ctx.Input(framework::GradVarName("Out")); auto in_dims = d_x->dims(); d_x->mutable_data(ctx.GetPlace(), d_out->type()); diff --git a/paddle/fluid/operators/reshape_op_npu.cc b/paddle/fluid/operators/reshape_op_npu.cc index a5ffeb5080799..e87f433586874 100644 --- a/paddle/fluid/operators/reshape_op_npu.cc +++ b/paddle/fluid/operators/reshape_op_npu.cc @@ -30,11 +30,11 @@ class Reshape2NPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); auto place = ctx.GetPlace(); - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); std::vector target_shape_vector; - auto shape_tensor_vector = ctx.MultiInput("ShapeTensor"); + auto shape_tensor_vector = ctx.MultiInput("ShapeTensor"); if (shape_tensor_vector.size() > 0) { for (auto* shape_tensor : shape_tensor_vector) { PADDLE_ENFORCE_EQ( @@ -127,8 +127,8 @@ template class Reshape2GradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* d_x = ctx.Output(framework::GradVarName("X")); - auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_x = ctx.Output(framework::GradVarName("X")); + auto* d_out = ctx.Input(framework::GradVarName("Out")); auto in_dims = d_x->dims(); d_x->mutable_data(ctx.GetPlace(), d_out->type()); diff --git a/paddle/fluid/operators/rnn_op_mlu.cc b/paddle/fluid/operators/rnn_op_mlu.cc index fe567333b6d40..cf4e255668232 100644 --- a/paddle/fluid/operators/rnn_op_mlu.cc +++ b/paddle/fluid/operators/rnn_op_mlu.cc @@ -20,9 +20,9 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; -using TensorList = std::vector; +using TensorList = std::vector; template void reset_parameter_vector( const std::vector& raw_params_vec, @@ -60,23 +60,23 @@ class RNNMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { // Input auto& dev_ctx = GetDevCtxFromCTX(ctx); - auto* input = ctx.Input("Input"); - auto pre_state = ctx.MultiInput("PreState"); - auto weight_list = ctx.MultiInput("WeightList"); + auto* input = ctx.Input("Input"); + auto pre_state = ctx.MultiInput("PreState"); + auto weight_list = ctx.MultiInput("WeightList"); bool has_seq_length = ctx.HasInput("SequenceLength"); // Output - auto state = ctx.MultiOutput("State"); - auto* output = ctx.Output("Out"); - auto* reserve_data = ctx.Output("Reserve"); + auto state = ctx.MultiOutput("State"); + auto* output = ctx.Output("Out"); + auto* reserve_data = ctx.Output("Reserve"); // Attributes const int& num_layers = ctx.Attr("num_layers"); const bool& is_bidirec = ctx.Attr("is_bidirec"); const int& hidden_size = ctx.Attr("hidden_size"); const std::string& mode = ctx.Attr("mode"); - const Tensor* sequence_length = nullptr; + const phi::DenseTensor* sequence_length = nullptr; if (has_seq_length) { - sequence_length = ctx.Input("SequenceLength"); + sequence_length = ctx.Input("SequenceLength"); } auto init_h = pre_state[0]; // -> hx @@ -178,7 +178,7 @@ class RNNMLUKernel : public framework::OpKernel { // copy weight params size_t weightspace_size; - framework::Tensor weightspace; + phi::DenseTensor weightspace; PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNWeightSpaceSize( GetHandleFromCTX(ctx), rnn_desc.get(), &weightspace_size)); @@ -306,10 +306,9 @@ class RNNMLUKernel : public framework::OpKernel { auto masked_mode = CNNL_MASKED_FILL; float off_value = 0.0f; - framework::Tensor on_value_tensor(input->dtype()); - framework::Tensor masked_tensor(framework::TransToPhiDataType(VT::INT8)); - framework::Tensor h_masked_tensor( - framework::TransToPhiDataType(VT::INT8)); + phi::DenseTensor on_value_tensor(input->dtype()); + phi::DenseTensor masked_tensor(framework::TransToPhiDataType(VT::INT8)); + phi::DenseTensor h_masked_tensor(framework::TransToPhiDataType(VT::INT8)); on_value_tensor.Resize({1}); masked_tensor.Resize({seq_len, batch_size, direction_num * hidden_size}); h_masked_tensor.Resize( @@ -362,20 +361,20 @@ class RNNMLUGradKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); auto stream = ctx.template device_context().stream(); // get the tensor pointer for the input - auto* input = ctx.Input("Input"); - auto pre_state = ctx.MultiInput("PreState"); - auto weight_list = ctx.MultiInput("WeightList"); - auto* output = ctx.Input("Out"); - auto* reserve_data = ctx.Input("Reserve"); + auto* input = ctx.Input("Input"); + auto pre_state = ctx.MultiInput("PreState"); + auto weight_list = ctx.MultiInput("WeightList"); + auto* output = ctx.Input("Out"); + auto* reserve_data = ctx.Input("Reserve"); const int& num_layers = ctx.Attr("num_layers"); const bool& is_bidirec = ctx.Attr("is_bidirec"); const int& hidden_size = ctx.Attr("hidden_size"); const std::string& mode = ctx.Attr("mode"); bool has_seq_length = ctx.HasInput("SequenceLength"); - const Tensor* sequence_length = nullptr; + const phi::DenseTensor* sequence_length = nullptr; if (has_seq_length) { - sequence_length = ctx.Input("SequenceLength"); + sequence_length = ctx.Input("SequenceLength"); } PADDLE_ENFORCE_EQ( @@ -387,19 +386,22 @@ class RNNMLUGradKernel : public framework::OpKernel { auto init_h = pre_state[0]; // -> hx auto init_c = pre_state[1]; // -> cx - auto output_grad = ctx.Input(framework::GradVarName("Out")); - auto state_grad = ctx.MultiInput(framework::GradVarName("State")); + auto output_grad = + ctx.Input(framework::GradVarName("Out")); + auto state_grad = + ctx.MultiInput(framework::GradVarName("State")); auto last_h_grad = state_grad[0]; // -> dhy auto last_c_grad = state_grad[1]; // -> dcy // get the tensor pointer for the output - auto* input_grad = ctx.Output(framework::GradVarName("Input")); - auto weight_grad_list = ctx.MultiOutput( - framework::GradVarName("WeightList")); + auto* input_grad = + ctx.Output(framework::GradVarName("Input")); + auto weight_grad_list = + ctx.MultiOutput(framework::GradVarName("WeightList")); auto pre_state_grad = - ctx.MultiOutput(framework::GradVarName("PreState")); - Tensor* init_h_grad = nullptr; - Tensor* init_c_grad = nullptr; + ctx.MultiOutput(framework::GradVarName("PreState")); + phi::DenseTensor* init_h_grad = nullptr; + phi::DenseTensor* init_c_grad = nullptr; if (pre_state_grad.size() > 0) { // has gradient init_h_grad = pre_state_grad[0]; // -> dhx init_c_grad = pre_state_grad[1]; // -> dcx @@ -458,8 +460,8 @@ class RNNMLUGradKernel : public framework::OpKernel { FillMLUTensorWithHostValue(ctx, static_cast(0.0), input_grad); Tensor a, b; - Tensor* dynamic_grad_pre_h = &a; - Tensor* dynamic_grad_pre_c = &b; + phi::DenseTensor* dynamic_grad_pre_h = &a; + phi::DenseTensor* dynamic_grad_pre_c = &b; if (init_h_grad) { init_h_grad->mutable_data(last_h_grad->dims(), ctx.GetPlace()); FillMLUTensorWithHostValue(ctx, static_cast(0.0), init_h_grad); @@ -516,7 +518,7 @@ class RNNMLUGradKernel : public framework::OpKernel { // copy weight size_t weightspace_size; - framework::Tensor weightspace, dweightspace; + phi::DenseTensor weightspace, dweightspace; PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNWeightSpaceSize( GetHandleFromCTX(ctx), rnn_desc.get(), &weightspace_size)); diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc index 922d255bbe20e..75054916e90da 100644 --- a/paddle/fluid/operators/roi_align_op.cc +++ b/paddle/fluid/operators/roi_align_op.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class ROIAlignOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/roi_align_op_mlu.cc b/paddle/fluid/operators/roi_align_op_mlu.cc index c6f17b56cd074..58791ef1bca2a 100644 --- a/paddle/fluid/operators/roi_align_op_mlu.cc +++ b/paddle/fluid/operators/roi_align_op_mlu.cc @@ -19,16 +19,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template class ROIAlignOpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); out->set_layout(framework::DataLayout::kNHWC); @@ -46,7 +46,7 @@ class ROIAlignOpMLUKernel : public framework::OpKernel { std::vector roi_batch_id_list(rois_num); int rois_batch_size = 0; if (ctx.HasInput("RoisNum")) { - auto* rois_num_t = ctx.Input("RoisNum"); + auto* rois_num_t = ctx.Input("RoisNum"); rois_batch_size = rois_num_t->numel(); PADDLE_ENFORCE_EQ( rois_batch_size, @@ -176,8 +176,8 @@ class ROIAlignGradOpMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* rois = ctx.Input("ROIs"); - auto* out_grad = ctx.Input(framework::GradVarName("Out")); - auto* in_grad = ctx.Output(framework::GradVarName("X")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* in_grad = ctx.Output(framework::GradVarName("X")); auto spatial_scale = ctx.Attr("spatial_scale"); auto sampling_ratio = ctx.Attr("sampling_ratio"); @@ -193,7 +193,7 @@ class ROIAlignGradOpMLUKernel : public framework::OpKernel { auto cplace = platform::CPUPlace(); int rois_batch_size = 0; if (ctx.HasInput("RoisNum")) { - auto* rois_num_t = ctx.Input("RoisNum"); + auto* rois_num_t = ctx.Input("RoisNum"); rois_batch_size = rois_num_t->numel(); std::vector rois_num_list(rois_batch_size); memory::Copy(cplace, diff --git a/paddle/fluid/operators/roi_align_op_npu.cc b/paddle/fluid/operators/roi_align_op_npu.cc index f14e29f8ddc27..8fd2616a92cc4 100644 --- a/paddle/fluid/operators/roi_align_op_npu.cc +++ b/paddle/fluid/operators/roi_align_op_npu.cc @@ -15,16 +15,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ROIAlignNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* X = ctx.Input("X"); // (B,C,H,W) - auto* ROIs = ctx.Input("ROIs"); // (N,4) - auto* ROIsNum = ctx.Input("RoisNum"); // [0 1 1 2 2 2] - auto* Out = ctx.Output("Out"); + auto* X = ctx.Input("X"); // (B,C,H,W) + auto* ROIs = ctx.Input("ROIs"); // (N,4) + auto* ROIsNum = ctx.Input("RoisNum"); // [0 1 1 2 2 2] + auto* Out = ctx.Output("Out"); Out->mutable_data(ctx.GetPlace()); auto spatial_scale = ctx.Attr("spatial_scale"); @@ -63,7 +63,7 @@ class ROIAlignNPUKernel : public framework::OpKernel { runner_c.Run(stream); // concate to make (N, 5) - std::vector x_list; + std::vector x_list; x_list.push_back(ROIsNum_fp); x_list.push_back(*ROIs); auto axis = 1; @@ -95,11 +95,10 @@ template class ROIAlignNPUGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); + auto* in = ctx.Input("X"); auto* rois = ctx.Input("ROIs"); - auto* out_grad = - ctx.Input(framework::GradVarName("Out")); - auto* in_grad = ctx.Output(framework::GradVarName("X")); + auto* out_grad = ctx.Input(framework::GradVarName("Out")); + auto* in_grad = ctx.Output(framework::GradVarName("X")); auto pooled_height = ctx.Attr("pooled_height"); auto pooled_width = ctx.Attr("pooled_width"); @@ -137,7 +136,7 @@ class ROIAlignNPUGradKernel : public framework::OpKernel { "ROIAlignGradNPU only support ROIs type equaled to FP32.")); // Cast RoisNum to fp32 tensor - auto* RoisNum = ctx.Input("RoisNum"); + auto* RoisNum = ctx.Input("RoisNum"); Tensor ROIs_N5; ROIs_N5.mutable_data({rois_num, 5}, place); Tensor ROIsNum_fp; @@ -150,7 +149,7 @@ class ROIAlignNPUGradKernel : public framework::OpKernel { ROIsNum_fp.Resize({rois_num, 1}); // Combine *ROIsNum with ROIs to get new ROIs - std::vector x_list; + std::vector x_list; x_list.push_back(ROIsNum_fp); x_list.push_back(*rois); const auto& runner_concat = NpuOpRunner( diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc index c95e235aff98b..74b9b0c06c3d0 100644 --- a/paddle/fluid/operators/roi_pool_op.cc +++ b/paddle/fluid/operators/roi_pool_op.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class ROIPoolOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/roll_op.cc b/paddle/fluid/operators/roll_op.cc index 7ac1d4b8d4508..a504c7f8ddb87 100644 --- a/paddle/fluid/operators/roll_op.cc +++ b/paddle/fluid/operators/roll_op.cc @@ -26,8 +26,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class RollOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 1bf471641d5a5..4dcda3fdae29c 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -24,7 +24,6 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using framework::Tensor; template : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *x = context.Input("X"); - auto *filter = context.Input("Filter"); + auto *filter = context.Input("Filter"); auto *out = context.Output("Out"); out->mutable_data(context.GetPlace()); @@ -184,12 +183,12 @@ class RowConvKernel : public framework::OpKernel { current_timesteps = end - start; } // int current_timesteps = end - start; - Tensor cur_input_sequence = + phi::DenseTensor cur_input_sequence = x->Slice(start, end); // Current input sequence cur_input_sequence = cur_input_sequence.Resize({current_timesteps, input_dim}); - Tensor cur_output_sequence = + phi::DenseTensor cur_output_sequence = out->Slice(start, end); // Current output sequence cur_output_sequence = cur_output_sequence.Resize({current_timesteps, input_dim}); @@ -219,10 +218,11 @@ class RowConvGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *x = context.Input("X"); - auto *filter = context.Input("Filter"); + auto *filter = context.Input("Filter"); auto *d_out = context.Input(framework::GradVarName("Out")); auto *dx = context.Output(framework::GradVarName("X")); - auto *d_filter = context.Output(framework::GradVarName("Filter")); + auto *d_filter = + context.Output(framework::GradVarName("Filter")); auto &x_lod = x->lod(); bool is_tensor = x_lod.empty(); @@ -264,9 +264,10 @@ class RowConvGradKernel : public framework::OpKernel { } else { current_timesteps = end - start; } - Tensor cur_input = x->Slice(start, end); // Current input sequence + phi::DenseTensor cur_input = + x->Slice(start, end); // Current input sequence cur_input = cur_input.Resize({current_timesteps, input_dim}); - Tensor cur_doutput = + phi::DenseTensor cur_doutput = d_out->Slice(start, end); // Current output grad sequence cur_doutput = cur_doutput.Resize({current_timesteps, input_dim}); auto cur_ip = EigenMatrix::From(cur_input); @@ -298,10 +299,10 @@ class RowConvGradKernel : public framework::OpKernel { current_timesteps = end - start; } - Tensor cur_doutput = + phi::DenseTensor cur_doutput = d_out->Slice(start, end); // Current output grad sequence cur_doutput = cur_doutput.Resize({current_timesteps, input_dim}); - Tensor cur_dinput = + phi::DenseTensor cur_dinput = dx->Slice(start, end); // Current input grad sequence cur_dinput = cur_dinput.Resize({current_timesteps, input_dim}); diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu index f69889f7f8f25..6134c930ea01c 100644 --- a/paddle/fluid/operators/row_conv_op.cu +++ b/paddle/fluid/operators/row_conv_op.cu @@ -19,7 +19,6 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using framework::Tensor; namespace { @@ -327,7 +326,7 @@ class RowConvKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *X = context.Input("X"); - auto *Filter = context.Input("Filter"); + auto *Filter = context.Input("Filter"); auto *Out = context.Output("Out"); const T *in = X->data(); @@ -381,14 +380,16 @@ class RowConvGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto *X = context.Input("X"); - auto *Filter = context.Input("Filter"); + auto *Filter = context.Input("Filter"); auto *dOut = context.Input(framework::GradVarName("Out")); const T *in = X->data(); const T *weights = Filter->data(); const T *dout = dOut->data(); - Tensor *dX = context.Output(framework::GradVarName("X")); - Tensor *dFilter = context.Output(framework::GradVarName("Filter")); + phi::DenseTensor *dX = + context.Output(framework::GradVarName("X")); + phi::DenseTensor *dFilter = + context.Output(framework::GradVarName("Filter")); int batch_size = 0; bool is_tensor = X->lod().empty(); if (is_tensor) { diff --git a/paddle/fluid/operators/rrelu_op.cc b/paddle/fluid/operators/rrelu_op.cc index ecbe7fe663fc1..823eb03aff6ce 100644 --- a/paddle/fluid/operators/rrelu_op.cc +++ b/paddle/fluid/operators/rrelu_op.cc @@ -22,8 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class RReluOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/run_program_op.cc b/paddle/fluid/operators/run_program_op.cc index 45fee045cbfd5..64afb3a2b91e9 100644 --- a/paddle/fluid/operators/run_program_op.cc +++ b/paddle/fluid/operators/run_program_op.cc @@ -55,7 +55,7 @@ class RunProgramOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return expected_kernel_type; } @@ -173,7 +173,7 @@ class RunProgramGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return expected_kernel_type; } diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu index d0d8af95a3f72..8871627b85242 100644 --- a/paddle/fluid/operators/sample_logits_op.cu +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -112,18 +112,21 @@ __global__ void gpu_compute_remove_accidental_hits(const int size, template class SampleLogitsCUDAKernel : public framework::OpKernel { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { // get necessary inputs - const Tensor* logits = context.Input("Logits"); - const Tensor* labels = context.Input("Labels"); + const phi::DenseTensor* logits = context.Input("Logits"); + const phi::DenseTensor* labels = context.Input("Labels"); VLOG(3) << "Enter SampleLogitsCUDAKernel"; // get necessary outputs - Tensor* samples = context.Output("Samples"); - Tensor* probabilities = context.Output("Probabilities"); - Tensor* sampled_logits = context.Output("SampledLogits"); - Tensor* sampled_labels = context.Output("SampledLabels"); + phi::DenseTensor* samples = context.Output("Samples"); + phi::DenseTensor* probabilities = + context.Output("Probabilities"); + phi::DenseTensor* sampled_logits = + context.Output("SampledLogits"); + phi::DenseTensor* sampled_labels = + context.Output("SampledLabels"); // shapes const auto batch_size = logits->dims()[0]; @@ -158,10 +161,10 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { size, num_true, sampled_labels_data); if (use_customized_samples) { - const Tensor* customized_samples = - context.Input("CustomizedSamples"); - const Tensor* customized_probabilities = - context.Input("CustomizedProbabilities"); + const phi::DenseTensor* customized_samples = + context.Input("CustomizedSamples"); + const phi::DenseTensor* customized_probabilities = + context.Input("CustomizedProbabilities"); PADDLE_ENFORCE_EQ(customized_samples, samples, platform::errors::InvalidArgument( @@ -235,12 +238,15 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { template class SampleLogitsGradCUDAKernel : public framework::OpKernel { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { - auto logits_grad = context.Output(framework::GradVarName("Logits")); - const Tensor* samples = context.Input("Samples"); - const Tensor* sampled_logits_grad = - context.Input(framework::GradVarName("SampledLogits")); + auto logits_grad = + context.Output(framework::GradVarName("Logits")); + const phi::DenseTensor* samples = + context.Input("Samples"); + const phi::DenseTensor* sampled_logits_grad = + context.Input( + framework::GradVarName("SampledLogits")); logits_grad->mutable_data(context.GetPlace()); auto& dev_ctx = context.cuda_device_context(); diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index d6affde0ce022..584d115d28ff3 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -48,9 +48,9 @@ struct TolerableValue { // UNDERSTAND: something like take_along_axis in numpy. template static void CPUTakeAlongD1(const platform::DeviceContext& ctx, - const framework::Tensor& array, - const framework::Tensor& index, - framework::Tensor* value) { + const phi::DenseTensor& array, + const phi::DenseTensor& index, + phi::DenseTensor* value) { PADDLE_ENFORCE_EQ( platform::is_cpu_place(ctx.GetPlace()), true, @@ -119,9 +119,9 @@ static void CPUTakeAlongD1(const platform::DeviceContext& ctx, // indices, scatter is done in += way. template static void CPUPutAlongD1(const platform::DeviceContext& ctx, - framework::Tensor* array, - const framework::Tensor& index, - const framework::Tensor& value) { + phi::DenseTensor* array, + const phi::DenseTensor& index, + const phi::DenseTensor& value) { PADDLE_ENFORCE_EQ( platform::is_cpu_place(ctx.GetPlace()), true, @@ -188,8 +188,8 @@ static void CPUPutAlongD1(const platform::DeviceContext& ctx, // logits by a float max, here 1e20 template static void compute_remove_accidental_hits(const platform::DeviceContext& ctx, - framework::Tensor* sampled_logits, - const framework::Tensor& samples, + phi::DenseTensor* sampled_logits, + const phi::DenseTensor& samples, const int num_true) { const auto batch_size = sampled_logits->dims()[0]; const auto num_sampled_classes = sampled_logits->dims()[1]; @@ -212,7 +212,7 @@ static void compute_remove_accidental_hits(const platform::DeviceContext& ctx, template class SampleLogitsKernel : public framework::OpKernel { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { PADDLE_ENFORCE_EQ( platform::is_cpu_place(context.GetPlace()), @@ -220,14 +220,17 @@ class SampleLogitsKernel : public framework::OpKernel { platform::errors::InvalidArgument("this kernel only runs on cpu.")); VLOG(3) << "Enter SampleLogitsKernel"; // get necessary inputs - const Tensor* logits = context.Input("Logits"); - const Tensor* labels = context.Input("Labels"); + const phi::DenseTensor* logits = context.Input("Logits"); + const phi::DenseTensor* labels = context.Input("Labels"); // get necessary outputs - Tensor* samples = context.Output("Samples"); - Tensor* probabilities = context.Output("Probabilities"); - Tensor* sampled_logits = context.Output("SampledLogits"); - Tensor* sampled_labels = context.Output("SampledLabels"); + phi::DenseTensor* samples = context.Output("Samples"); + phi::DenseTensor* probabilities = + context.Output("Probabilities"); + phi::DenseTensor* sampled_logits = + context.Output("SampledLogits"); + phi::DenseTensor* sampled_labels = + context.Output("SampledLabels"); // shapes const auto batch_size = logits->dims()[0]; @@ -257,10 +260,10 @@ class SampleLogitsKernel : public framework::OpKernel { } if (use_customized_samples) { - const Tensor* customized_samples = - context.Input("CustomizedSamples"); - const Tensor* customized_probabilities = - context.Input("CustomizedProbabilities"); + const phi::DenseTensor* customized_samples = + context.Input("CustomizedSamples"); + const phi::DenseTensor* customized_probabilities = + context.Input("CustomizedProbabilities"); PADDLE_ENFORCE_EQ(customized_samples, samples, platform::errors::InvalidArgument( @@ -305,12 +308,15 @@ class SampleLogitsKernel : public framework::OpKernel { template class SampleLogitsGradKernel : public framework::OpKernel { public: - using Tensor = framework::Tensor; + using Tensor = phi::DenseTensor; void Compute(const framework::ExecutionContext& context) const override { - auto logits_grad = context.Output(framework::GradVarName("Logits")); - const Tensor* samples = context.Input("Samples"); - const Tensor* sampled_logits_grad = - context.Input(framework::GradVarName("SampledLogits")); + auto logits_grad = + context.Output(framework::GradVarName("Logits")); + const phi::DenseTensor* samples = + context.Input("Samples"); + const phi::DenseTensor* sampled_logits_grad = + context.Input( + framework::GradVarName("SampledLogits")); logits_grad->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); diff --git a/paddle/fluid/operators/sampling_id_op.cc b/paddle/fluid/operators/sampling_id_op.cc index cd91e119faae0..6d2d3f4a60047 100644 --- a/paddle/fluid/operators/sampling_id_op.cc +++ b/paddle/fluid/operators/sampling_id_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SamplingIdOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h index 713d1b0475c3c..43c0bdcf4043e 100644 --- a/paddle/fluid/operators/sampling_id_op.h +++ b/paddle/fluid/operators/sampling_id_op.h @@ -27,13 +27,13 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SamplingIdKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - const Tensor* input = context.Input("X"); + const phi::DenseTensor* input = context.Input("X"); const int batch_size = static_cast(input->dims()[0]); const int width = static_cast(input->dims()[1]); @@ -75,7 +75,7 @@ class SamplingIdKernel : public framework::OpKernel { std::vector out_dim; out_dim.push_back(static_cast(batch_size)); - Tensor* output = context.Output("Out"); + phi::DenseTensor* output = context.Output("Out"); output->Resize(phi::make_ddim(out_dim)); output->mutable_data(context.GetPlace()); framework::TensorFromVector(ids, context.device_context(), output); diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc index 6b5c2367bb9ad..a25241d368aff 100644 --- a/paddle/fluid/operators/save_combine_op.cc +++ b/paddle/fluid/operators/save_combine_op.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SaveCombineOp : public framework::OperatorWithKernel { public: @@ -37,7 +37,7 @@ class SaveCombineOp : public framework::OperatorWithKernel { // in operator impl, which is not elegant enough. framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place()); diff --git a/paddle/fluid/operators/scale_op_mlu.cc b/paddle/fluid/operators/scale_op_mlu.cc index 363c3e98a6dfc..c9aefcfc5b1fc 100644 --- a/paddle/fluid/operators/scale_op_mlu.cc +++ b/paddle/fluid/operators/scale_op_mlu.cc @@ -28,10 +28,10 @@ class ScaleMLUKernel : public framework::OpKernel { // cnnl require input, scale, bias with same type. And all in device side. auto scale = static_cast(ctx.Attr("scale")); - framework::Tensor scale_tensor; + phi::DenseTensor scale_tensor; if (ctx.HasInput("ScaleTensor")) { - framework::Tensor float_scale_tensor = - *ctx.Input("ScaleTensor"); + phi::DenseTensor float_scale_tensor = + *ctx.Input("ScaleTensor"); if (framework::TransToProtoVarType(float_scale_tensor.dtype()) != framework::TransToProtoVarType(in->dtype())) { scale_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); @@ -60,7 +60,7 @@ class ScaleMLUKernel : public framework::OpKernel { } auto bias = static_cast(ctx.Attr("bias")); - framework::Tensor bias_tensor = + phi::DenseTensor bias_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc bias_desc(bias_tensor); MLUCnnl::Fill(ctx, @@ -98,7 +98,7 @@ class ScaleMLUKernel : public framework::OpKernel { output_desc.get(), GetBasePtr(out)); } else { - framework::Tensor new_bias_tensor = + phi::DenseTensor new_bias_tensor = ctx.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc new_bias_desc(new_bias_tensor); diff --git a/paddle/fluid/operators/scale_op_npu.cc b/paddle/fluid/operators/scale_op_npu.cc index 3663ded61daea..8d7e8d59004fa 100644 --- a/paddle/fluid/operators/scale_op_npu.cc +++ b/paddle/fluid/operators/scale_op_npu.cc @@ -19,9 +19,9 @@ namespace paddle { namespace operators { template -static inline T GetAttrFromTensor(const framework::Tensor* tensor) { +static inline T GetAttrFromTensor(const phi::DenseTensor* tensor) { const auto* tensor_data = tensor->data(); - framework::Tensor cpu_tensor; + phi::DenseTensor cpu_tensor; if (platform::is_gpu_place(tensor->place()) || platform::is_npu_place(tensor->place())) { paddle::framework::TensorCopySync( @@ -35,8 +35,8 @@ template class ScaleNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); auto scale = ctx.Attr("scale"); auto bias = ctx.Attr("bias"); auto bias_after_scale = ctx.Attr("bias_after_scale"); @@ -47,7 +47,7 @@ class ScaleNPUKernel : public framework::OpKernel { VLOG(4) << "scale:" << scale << ", bias:" << bias << " ,bias_after_scale:" << bias_after_scale; if (ctx.HasInput("ScaleTensor")) { - auto* scale_tensor = ctx.Input("ScaleTensor"); + auto* scale_tensor = ctx.Input("ScaleTensor"); scale = static_cast(GetAttrFromTensor(scale_tensor)); } if (isinf(scale)) { diff --git a/paddle/fluid/operators/scatter_nd_add_op.cc b/paddle/fluid/operators/scatter_nd_add_op.cc index e15a9b98e8c4a..4ed08a387f2a0 100644 --- a/paddle/fluid/operators/scatter_nd_add_op.cc +++ b/paddle/fluid/operators/scatter_nd_add_op.cc @@ -37,7 +37,7 @@ class ScatterNdAddOp : public framework::OperatorWithKernel { "Ref and Updates must have same type")); return framework::OpKernelType( framework::TransToProtoVarType( - ctx.Input("X")->type()), + ctx.Input("X")->type()), ctx.device_context()); } }; diff --git a/paddle/fluid/operators/scatter_op_mlu.cc b/paddle/fluid/operators/scatter_op_mlu.cc index 952da0edb8f34..a4cb5d7424936 100644 --- a/paddle/fluid/operators/scatter_op_mlu.cc +++ b/paddle/fluid/operators/scatter_op_mlu.cc @@ -19,11 +19,11 @@ template class ScatterMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* indices = ctx.Input("Ids"); - auto* updates = ctx.Input("Updates"); + auto* x = ctx.Input("X"); + auto* indices = ctx.Input("Ids"); + auto* updates = ctx.Input("Updates"); bool overwrite = ctx.Attr("overwrite"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); MLUCnnlTensorDesc x_desc(*x); diff --git a/paddle/fluid/operators/scatter_op_npu.cc b/paddle/fluid/operators/scatter_op_npu.cc index 40e0b983e25d9..6bffd24734055 100644 --- a/paddle/fluid/operators/scatter_op_npu.cc +++ b/paddle/fluid/operators/scatter_op_npu.cc @@ -22,23 +22,23 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ScatterNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* index = ctx.Input("Ids"); - auto* updates = ctx.Input("Updates"); + auto* x = ctx.Input("X"); + auto* index = ctx.Input("Ids"); + auto* updates = ctx.Input("Updates"); bool overwrite = ctx.Attr("overwrite"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); out->mutable_data(place); - framework::Tensor tmp_tensor(index->type()); + phi::DenseTensor tmp_tensor(index->type()); const auto index_dims = index->dims(); if (index_dims.size() == 1) { tmp_tensor.ShareDataWith(*index); diff --git a/paddle/fluid/operators/scatter_test.cc b/paddle/fluid/operators/scatter_test.cc index 1249e3e807ec7..7f774089fd9ca 100644 --- a/paddle/fluid/operators/scatter_test.cc +++ b/paddle/fluid/operators/scatter_test.cc @@ -20,9 +20,9 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" TEST(scatter, ScatterUpdate) { - paddle::framework::Tensor src; - paddle::framework::Tensor index; - paddle::framework::Tensor output; + phi::DenseTensor src; + phi::DenseTensor index; + phi::DenseTensor output; auto* p_src = src.mutable_data(phi::make_ddim({1, 4}), paddle::platform::CPUPlace()); diff --git a/paddle/fluid/operators/search_compute.h b/paddle/fluid/operators/search_compute.h index 07cd48604b8aa..eceef2b4e5470 100644 --- a/paddle/fluid/operators/search_compute.h +++ b/paddle/fluid/operators/search_compute.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; diff --git a/paddle/fluid/operators/seed_op.cc b/paddle/fluid/operators/seed_op.cc index 7de155b01c20e..88a1884ae53e4 100644 --- a/paddle/fluid/operators/seed_op.cc +++ b/paddle/fluid/operators/seed_op.cc @@ -17,7 +17,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SeedOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/seed_op.cu b/paddle/fluid/operators/seed_op.cu index be406db50569d..87ba439d79201 100644 --- a/paddle/fluid/operators/seed_op.cu +++ b/paddle/fluid/operators/seed_op.cu @@ -22,7 +22,7 @@ template class GPUSeedKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *out = context.Output("Out"); + auto *out = context.Output("Out"); int seed = get_seed(context); auto force_cpu = context.Attr("force_cpu"); diff --git a/paddle/fluid/operators/seed_op.h b/paddle/fluid/operators/seed_op.h index 202f25e0b4cd1..a1c3484b7a728 100644 --- a/paddle/fluid/operators/seed_op.h +++ b/paddle/fluid/operators/seed_op.h @@ -19,7 +19,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static int get_seed(const framework::ExecutionContext& context) { int user_seed = context.Attr("seed"); @@ -49,7 +49,7 @@ template class CPUSeedKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* out = context.Output("Out"); + auto* out = context.Output("Out"); auto* out_data = out->mutable_data(context.GetPlace()); out_data[0] = get_seed(context); } diff --git a/paddle/fluid/operators/seed_op_npu.cc b/paddle/fluid/operators/seed_op_npu.cc index 39bd21e9ba6ab..cee905bdc1491 100644 --- a/paddle/fluid/operators/seed_op_npu.cc +++ b/paddle/fluid/operators/seed_op_npu.cc @@ -22,7 +22,7 @@ template class NPUSeedKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); int user_seed = ctx.Attr("seed"); std::random_device rnd; int seed; diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h index 4943e0e2ea09b..fa10965462191 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h @@ -26,7 +26,7 @@ namespace operators { namespace detail { template inline framework::LoD ConcatLoD(const Container &xs, - std::vector *xs_in_order) { + std::vector *xs_in_order) { std::vector result; result.resize(xs[0].get().lod()[0].size()); @@ -34,7 +34,7 @@ inline framework::LoD ConcatLoD(const Container &xs, size_t sum = 0; for (size_t j = 0; j < xs.size(); ++j) { auto &x_lod = xs[j].get().lod()[0]; - const framework::Tensor &tensor = xs[j].get(); + const phi::DenseTensor &tensor = xs[j].get(); if (x_lod[i - 1] < x_lod[i]) { xs_in_order->emplace_back(tensor.Slice(x_lod[i - 1], x_lod[i])); } @@ -98,7 +98,7 @@ class SeqConcatKernel : public framework::OpKernel { "received input lod size is %d", lod_size)); - std::vector x_in_order; + std::vector x_in_order; out.set_lod(detail::ConcatLoD(xs, &x_in_order)); out.mutable_data(context.GetPlace()); math::ConcatFunctor functor; @@ -129,8 +129,8 @@ class SeqConcatGradKernel : public framework::OpKernel { } } - std::vector sliced_x; - std::vector> sliced_dx; + std::vector sliced_x; + std::vector> sliced_dx; for (size_t i = 1; i < xs[0]->lod()[0].size(); ++i) { for (size_t j = 0; j < xs.size(); ++j) { @@ -157,13 +157,13 @@ class SeqConcatGradKernel : public framework::OpKernel { } } - std::vector sliced_x_ptr; + std::vector sliced_x_ptr; sliced_x_ptr.reserve(sliced_x.size()); for (auto &x : sliced_x) { sliced_x_ptr.emplace_back(&x); } - std::vector sliced_dx_ptr; + std::vector sliced_dx_ptr; sliced_dx_ptr.reserve(sliced_dx.size()); for (auto &dx : sliced_dx) { if (dx) { @@ -174,7 +174,7 @@ class SeqConcatGradKernel : public framework::OpKernel { math::SplitFunctor functor; functor(context.template device_context(), GET_DATA_SAFELY( - context.Input(framework::GradVarName("Out")), + context.Input(framework::GradVarName("Out")), "Input", framework::GradVarName("Out"), "SeqConcatGrad"), diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h index ee7677aa2164d..80a9019906e8b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -31,7 +31,7 @@ class SequenceConvKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); auto* out = context.Output("Out"); - auto filter = *context.Input("Filter"); + auto filter = *context.Input("Filter"); out->mutable_data(context.GetPlace()); @@ -53,9 +53,9 @@ class SequenceConvKernel : public framework::OpKernel { "present. But received: lod level %u.", in->lod().size())); - const Tensor* padding_data = nullptr; + const phi::DenseTensor* padding_data = nullptr; if (padding_trainable) { - padding_data = context.Input("PaddingData"); + padding_data = context.Input("PaddingData"); } int up_pad = std::max(0, -context_start); @@ -94,11 +94,12 @@ class SequenceConvGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* in_g = context.Output(framework::GradVarName("X")); auto* out_g = context.Input(framework::GradVarName("Out")); - auto* filter_g = context.Output(framework::GradVarName("Filter")); + auto* filter_g = + context.Output(framework::GradVarName("Filter")); auto* padding_data_g = - context.Output(framework::GradVarName("PaddingData")); + context.Output(framework::GradVarName("PaddingData")); auto* in = context.Input("X"); - auto* filter = context.Input("Filter"); + auto* filter = context.Input("Filter"); int context_start = context.Attr("contextStart"); int context_length = context.Attr("contextLength"); @@ -180,9 +181,9 @@ class SequenceConvGradKernel : public framework::OpKernel { Tensor filter_grad = *filter_g; LoDTensor out_grad = *out_g; - const Tensor* padding_data = nullptr; + const phi::DenseTensor* padding_data = nullptr; if (padding_trainable) { - padding_data = context.Input("PaddingData"); + padding_data = context.Input("PaddingData"); } seq_project_functor(dev_ctx, diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc index bfd5ce38645e8..f0083ec4042e6 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SequenceConvXPUKernel : public framework::OpKernel { @@ -27,7 +27,7 @@ class SequenceConvXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); auto* out = context.Output("Out"); - auto filter = *context.Input("Filter"); + auto filter = *context.Input("Filter"); out->mutable_data(context.GetPlace()); @@ -161,9 +161,10 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* in_g = context.Output(framework::GradVarName("X")); auto* out_g = context.Input(framework::GradVarName("Out")); - auto* filter_g = context.Output(framework::GradVarName("Filter")); + auto* filter_g = + context.Output(framework::GradVarName("Filter")); auto* in = context.Input("X"); - auto* filter = context.Input("Filter"); + auto* filter = context.Input("Filter"); int context_start = context.Attr("contextStart"); int context_length = context.Attr("contextLength"); diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc index 8ea756e455e23..6c14fa997fe5e 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc @@ -47,7 +47,7 @@ class SequenceMaskOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "depth_tensor") { return expected_kernel_type; diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h index a8105ef71a550..01cbed4509d85 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.h @@ -29,7 +29,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template struct SequenceMaskForRangeFunctor { @@ -75,17 +75,17 @@ class SequenceMaskKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *y = ctx.Output("Y"); + auto *x = ctx.Input("X"); + auto *y = ctx.Output("Y"); int maxlen = ctx.Attr("maxlen"); if (ctx.HasInput("MaxLenTensor")) { - auto max_len_tensor = ctx.Input("MaxLenTensor"); + auto max_len_tensor = ctx.Input("MaxLenTensor"); PADDLE_ENFORCE_NOT_NULL(max_len_tensor, platform::errors::InvalidArgument( "Input(MaxLenTensor) should not be NULL." "But received Input(MaxLenTensor) is NULL")); if (platform::is_gpu_place(max_len_tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync( *max_len_tensor, platform::CPUPlace(), &temp); maxlen = *temp.data(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc index b39e4f3bdd612..1290e79bc076d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op_npu.cc @@ -18,24 +18,24 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SequenceMaskNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - auto* x = ctx.Input("X"); - auto* y = ctx.Output("Y"); + auto* x = ctx.Input("X"); + auto* y = ctx.Output("Y"); int maxlen = ctx.Attr("maxlen"); if (ctx.HasInput("MaxLenTensor")) { - auto max_len_tensor = ctx.Input("MaxLenTensor"); + auto max_len_tensor = ctx.Input("MaxLenTensor"); PADDLE_ENFORCE_NOT_NULL(max_len_tensor, platform::errors::InvalidArgument( "Input(MaxLenTensor) should not be NULL." "But received Input(MaxLenTensor) is NULL")); - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync( *max_len_tensor, platform::CPUPlace(), &temp); maxlen = *temp.data(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h index 90e84c5061e17..0811733a2d7ce 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template @@ -71,7 +71,7 @@ class SequencePoolKernel : public framework::OpKernel { dims[0] = lod[lod_level - 1].size() - 1; out->Resize({dims}); out->mutable_data(context.GetPlace()); - Tensor* index = nullptr; + phi::DenseTensor* index = nullptr; bool is_test = context.HasAttr("is_test") ? context.Attr("is_test") : false; @@ -81,7 +81,7 @@ class SequencePoolKernel : public framework::OpKernel { if (pooltype == "MAX" && (is_test == false || platform::is_cpu_place(context.GetPlace()) == false)) { - index = context.Output("MaxIndex"); + index = context.Output("MaxIndex"); index->Resize({dims}); index->mutable_data(context.GetPlace()); } @@ -103,9 +103,9 @@ class SequencePoolGradKernel : public framework::OpKernel { auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); std::string pooltype = context.Attr("pooltype"); - const Tensor* index = nullptr; + const phi::DenseTensor* index = nullptr; if (pooltype == "MAX") { - index = context.Input("MaxIndex"); + index = context.Input("MaxIndex"); } in_g->mutable_data(context.GetPlace()); math::SequencePoolGradFunctor pool; diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc index 1d53c39713acf..2050dfb27ddc8 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; class SequenceScatterOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h index 0be41c295e38d..68fe81c186da6 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.h @@ -20,17 +20,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template class SequenceScatterOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); + auto* x = ctx.Input("X"); auto* ids = ctx.Input("Ids"); auto* updates = ctx.Input("Updates"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto& ids_lod = ids->lod(); PADDLE_ENFORCE_EQ(ids_lod.empty(), @@ -96,10 +96,10 @@ class SequenceScatterGradientOpKernel : public framework::OpKernel { platform::errors::Unimplemented("Device dose not match. The " "SequenceScatterGradientOpKernel can " "only run on CPU device.")); - auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dX = ctx.Output(framework::GradVarName("X")); auto* dUpdates = ctx.Output(framework::GradVarName("Updates")); auto* ids = ctx.Input("Ids"); - auto* dOut = ctx.Input(framework::GradVarName("Out")); + auto* dOut = ctx.Input(framework::GradVarName("Out")); auto& ids_lod = ids->lod(); diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h index ad535341fd46f..e6310f7f9f54a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; @@ -45,8 +45,8 @@ class SequenceSliceOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); - auto* offset = ctx.Input("Offset"); - auto* length = ctx.Input("Length"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); auto* out = ctx.Output("Out"); auto lod = in->lod(); @@ -85,8 +85,8 @@ class SequenceSliceOpKernel : public framework::OpKernel { const int64_t* offset_data = offset->data(); const int64_t* length_data = length->data(); - framework::Tensor offset_cpu; - framework::Tensor length_cpu; + phi::DenseTensor offset_cpu; + phi::DenseTensor length_cpu; if (platform::is_gpu_place(ctx.GetPlace())) { offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); @@ -156,8 +156,8 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); - auto* offset = ctx.Input("Offset"); - auto* length = ctx.Input("Length"); + auto* offset = ctx.Input("Offset"); + auto* length = ctx.Input("Length"); auto* out_grad = ctx.Input(framework::GradVarName("Out")); auto* x_grad = @@ -165,8 +165,8 @@ class SequenceSliceGradOpKernel : public framework::OpKernel { const int64_t* offset_data = offset->data(); const int64_t* length_data = length->data(); - framework::Tensor offset_cpu; - framework::Tensor length_cpu; + phi::DenseTensor offset_cpu; + phi::DenseTensor length_cpu; if (platform::is_gpu_place(ctx.GetPlace())) { offset_cpu.mutable_data(offset->dims(), platform::CPUPlace()); diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc index b060aa9f08b15..73548eee454e4 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_cudnn_op.cu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h index 0d3d3b695af4b..d5489d296ba47 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h index 1c1168e449eb7..149a9f0c2db18 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; static constexpr int TopKPosPaddingId = -1; @@ -75,7 +75,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel { auto* row = context.Input("ROW"); auto* col = context.Input("COLUMN"); auto* out = context.Output("Out"); - auto* pos = context.Output("pos"); + auto* pos = context.Output("pos"); PADDLE_ENFORCE_EQ( in->lod().empty(), @@ -184,7 +184,7 @@ class SequenceTopkAvgPoolingGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* d_out = context.Input(framework::GradVarName("Out")); auto* d_in = context.Output(framework::GradVarName("X")); - auto* pos_input = context.Input("pos"); + auto* pos_input = context.Input("pos"); auto* row_input = context.Input("ROW"); auto* col_input = context.Input("COLUMN"); auto* forward_input = context.Input("X"); diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h index 747549eed5182..43425c3e3a27b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h @@ -36,7 +36,7 @@ class SequenceUnpadOpKernel : public framework::OpKernel { auto* out_t = ctx.Output("Out"); auto& dev_ctx = ctx.template device_context(); - framework::Tensor seq_len_cpu = + phi::DenseTensor seq_len_cpu = ctx.AllocateTmpTensor(len_t->dims(), dev_ctx); if (platform::is_gpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace())) { diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc index 074642e1b0241..b1fe2dedcb293 100644 --- a/paddle/fluid/operators/set_value_op.cc +++ b/paddle/fluid/operators/set_value_op.cc @@ -36,7 +36,7 @@ class OpBase; namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SetValue : public framework::OperatorWithKernel { public: @@ -210,7 +210,7 @@ class SetValueGrad : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext &ctx) const override { - auto in_tensor = ctx.Input(framework::GradVarName("Out")); + auto in_tensor = ctx.Input(framework::GradVarName("Out")); return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( ctx, framework::GradVarName("Out")), in_tensor->place()); diff --git a/paddle/fluid/operators/set_value_op.h b/paddle/fluid/operators/set_value_op.h index d754f609393cf..7ef766020251b 100644 --- a/paddle/fluid/operators/set_value_op.h +++ b/paddle/fluid/operators/set_value_op.h @@ -31,7 +31,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; inline std::string GetValueName(framework::proto::VarType::Type data_type) { diff --git a/paddle/fluid/operators/set_value_op_mlu.cc b/paddle/fluid/operators/set_value_op_mlu.cc index 9a6277dfa2312..1b950a6da6084 100644 --- a/paddle/fluid/operators/set_value_op_mlu.cc +++ b/paddle/fluid/operators/set_value_op_mlu.cc @@ -26,14 +26,16 @@ template class SetValueMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("Input"); - auto* value_tensor = ctx.Input("ValueTensor"); - auto* out = ctx.Output("Out"); + auto* in = ctx.Input("Input"); + auto* value_tensor = ctx.Input("ValueTensor"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - auto steps_tensor_list = ctx.MultiInput("StepsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto steps_tensor_list = + ctx.MultiInput("StepsTensorList"); auto axes = ctx.Attr>("axes"); auto starts = ctx.Attr>("starts"); @@ -135,7 +137,7 @@ class SetValueMLUKernel : public framework::OpKernel { int64_t stride_step = phi::product(in_dims); std::vector index_indices(stride_step); std::iota(index_indices.begin(), index_indices.end(), 0); - framework::Tensor index_temp; + phi::DenseTensor index_temp; in_temp.ShareDataWith(*in); val_temp.ShareDataWith(value_temp); paddle::framework::TensorFromVector( diff --git a/paddle/fluid/operators/set_value_op_npu.cc b/paddle/fluid/operators/set_value_op_npu.cc index 419cbe6f9a77e..7526b13311b05 100644 --- a/paddle/fluid/operators/set_value_op_npu.cc +++ b/paddle/fluid/operators/set_value_op_npu.cc @@ -25,13 +25,15 @@ template class SetValueNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input("Input"); - auto* value_tensor = ctx.Input("ValueTensor"); - auto* out = ctx.Output("Out"); - - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); - auto steps_tensor_list = ctx.MultiInput("StepsTensorList"); + auto* in = ctx.Input("Input"); + auto* value_tensor = ctx.Input("ValueTensor"); + auto* out = ctx.Output("Out"); + + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto steps_tensor_list = + ctx.MultiInput("StepsTensorList"); auto axes = ctx.Attr>("axes"); auto starts = ctx.Attr>("starts"); diff --git a/paddle/fluid/operators/shape_op.cc b/paddle/fluid/operators/shape_op.cc index 14f4b00b60d73..b191f7cfa0011 100644 --- a/paddle/fluid/operators/shape_op.cc +++ b/paddle/fluid/operators/shape_op.cc @@ -45,7 +45,7 @@ class ShapeOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/shape_op_mlu.cc b/paddle/fluid/operators/shape_op_mlu.cc index a890b22e7a933..5fde42dc7880f 100644 --- a/paddle/fluid/operators/shape_op_mlu.cc +++ b/paddle/fluid/operators/shape_op_mlu.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = phi::SelectedRows; @@ -35,7 +35,7 @@ class ShapeMLUKernel : public framework::OpKernel { } else { in_dims = in_var->Get().dims(); } - auto* out_t = ctx.Output("Out"); + auto* out_t = ctx.Output("Out"); out_t->Resize({in_dims.size()}); out_t->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/shape_op_npu.cc b/paddle/fluid/operators/shape_op_npu.cc index 95f2857bf3fcc..60a0162818c9d 100644 --- a/paddle/fluid/operators/shape_op_npu.cc +++ b/paddle/fluid/operators/shape_op_npu.cc @@ -18,14 +18,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ShapeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); - auto* out_t = ctx.Output("Out"); + auto* x = ctx.Input("Input"); + auto* out_t = ctx.Output("Out"); out_t->Resize({x->dims().size()}); out_t->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/shard_index_op_npu.cc b/paddle/fluid/operators/shard_index_op_npu.cc index b80a50454d756..a6ff8022d0b84 100644 --- a/paddle/fluid/operators/shard_index_op_npu.cc +++ b/paddle/fluid/operators/shard_index_op_npu.cc @@ -19,7 +19,7 @@ namespace paddle { namespace operators { using LoDTensor = framework::LoDTensor; -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class ShardIndexNPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/share_buffer_op.cc b/paddle/fluid/operators/share_buffer_op.cc index 89d6b231d7234..5ccd8d3189717 100644 --- a/paddle/fluid/operators/share_buffer_op.cc +++ b/paddle/fluid/operators/share_buffer_op.cc @@ -33,7 +33,7 @@ class ShareBufferOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return expected_kernel_type; } diff --git a/paddle/fluid/operators/share_buffer_op.h b/paddle/fluid/operators/share_buffer_op.h index 908047345fe0b..70cb72db36d79 100644 --- a/paddle/fluid/operators/share_buffer_op.h +++ b/paddle/fluid/operators/share_buffer_op.h @@ -24,8 +24,8 @@ template class ShareBufferOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto inputs = ctx.MultiInput("X"); - auto outputs = ctx.MultiOutput("Out"); + const auto inputs = ctx.MultiInput("X"); + auto outputs = ctx.MultiOutput("Out"); size_t n = inputs.size(); PADDLE_ENFORCE_EQ( n, diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc index 16daffd1291d9..2fe8512b4b155 100644 --- a/paddle/fluid/operators/shuffle_batch_op.cc +++ b/paddle/fluid/operators/shuffle_batch_op.cc @@ -63,7 +63,7 @@ class ShuffleBatchOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "Seed") { return expected_kernel_type; diff --git a/paddle/fluid/operators/shuffle_batch_op.cu b/paddle/fluid/operators/shuffle_batch_op.cu index 6b70b8d37d79c..4ab4868bfb5b2 100644 --- a/paddle/fluid/operators/shuffle_batch_op.cu +++ b/paddle/fluid/operators/shuffle_batch_op.cu @@ -56,11 +56,11 @@ class ShuffleBatchCUDAKernel : public framework::OpKernel { PADDLE_THROW(platform::errors::Unimplemented( "GPU shuffle_batch is not supported on Windows yet")); #else - auto *x = ctx.Input("X"); - auto *seed = ctx.Input("Seed"); - auto *out = ctx.Output("Out"); - auto *shuffleidx = ctx.Output("ShuffleIdx"); - auto *seed_out = ctx.Output("SeedOut"); + auto *x = ctx.Input("X"); + auto *seed = ctx.Input("Seed"); + auto *out = ctx.Output("Out"); + auto *shuffleidx = ctx.Output("ShuffleIdx"); + auto *seed_out = ctx.Output("SeedOut"); int64_t x_embed_size = x->dims()[x->dims().size() - 1]; int64_t elem_size = 1; @@ -76,7 +76,7 @@ class ShuffleBatchCUDAKernel : public framework::OpKernel { // NOTE: We have overwritten GetKernelTypeForVar, so seed_place would // not be CUDAPlace in practice. This case would only happen in Python // op_test framework. - framework::Tensor tmp_tensor; + phi::DenseTensor tmp_tensor; framework::TensorCopySync(*seed, platform::CPUPlace(), &tmp_tensor); seed_int = *(tmp_tensor.data()); } else { @@ -126,9 +126,9 @@ class ShuffleBatchGradCUDAKernel : public framework::OpKernel { "GPU shuffle_batch_grad is not supported on Windows yet")); #else const auto *out_grad = - ctx.Input(framework::GradVarName("Out")); - const auto *shuffleidx = ctx.Input("ShuffleIdx"); - auto *x_grad = ctx.Output(framework::GradVarName("X")); + ctx.Input(framework::GradVarName("Out")); + const auto *shuffleidx = ctx.Input("ShuffleIdx"); + auto *x_grad = ctx.Output(framework::GradVarName("X")); const auto *out_grad_data = out_grad->data(); const auto *shuffleidx_data = shuffleidx->data(); diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h index 009212afa81d0..cd24a8a2de159 100644 --- a/paddle/fluid/operators/shuffle_batch_op.h +++ b/paddle/fluid/operators/shuffle_batch_op.h @@ -32,7 +32,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; template diff --git a/paddle/fluid/operators/shuffle_channel_op.cu b/paddle/fluid/operators/shuffle_channel_op.cu index f51724d843107..26eee095377c0 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cu +++ b/paddle/fluid/operators/shuffle_channel_op.cu @@ -16,7 +16,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaximumNumBlocks = 4096; @@ -48,8 +48,8 @@ template class ShuffleChannelOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); int group = ctx.Attr("group"); auto input_dims = input->dims(); @@ -88,9 +88,9 @@ class ShuffleChannelGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* output_grad = - ctx.Input(framework::GradVarName("Out")); + ctx.Input(framework::GradVarName("Out")); auto* input_grad = - ctx.Output(framework::GradVarName("X")); + ctx.Output(framework::GradVarName("X")); int group = ctx.Attr("group"); diff --git a/paddle/fluid/operators/shuffle_channel_op.h b/paddle/fluid/operators/shuffle_channel_op.h index 06abd0628ea39..51dfd894c0cfc 100644 --- a/paddle/fluid/operators/shuffle_channel_op.h +++ b/paddle/fluid/operators/shuffle_channel_op.h @@ -23,8 +23,8 @@ template class ShuffleChannelOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); int group = ctx.Attr("group"); const auto& input_dims = input->dims(); @@ -59,9 +59,9 @@ class ShuffleChannelGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto* output_grad = - ctx.Input(framework::GradVarName("Out")); + ctx.Input(framework::GradVarName("Out")); auto* input_grad = - ctx.Output(framework::GradVarName("X")); + ctx.Output(framework::GradVarName("X")); int group = ctx.Attr("group"); diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index c9705fa9a9924..a05fae4b45a38 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -24,7 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; const int kIgnoreIndex = -100; class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc index b679432e51a5b..d77724281327c 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_mlu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; const int kIgnoreIndex = -100; void CheckAttrs(const framework::ExecutionContext& ctx) { @@ -43,10 +43,10 @@ class SigmoidCrossEntropyWithLogitsMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { CheckAttrs(ctx); - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -77,11 +77,11 @@ class SigmoidCrossEntropyWithLogitsMLUGradKernel void Compute(const framework::ExecutionContext& ctx) const override { CheckAttrs(ctx); - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); + auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc index 59eb23aceda02..ea3f119a05a91 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; const int kIgnoreIndex = -100; void CheckAttrs(const framework::ExecutionContext& ctx) { @@ -45,10 +45,10 @@ class SigmoidCrossEntropyWithLogitsNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { CheckAttrs(ctx); - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); auto place = ctx.GetPlace(); @@ -71,11 +71,11 @@ class SigmoidCrossEntropyWithLogitsNPUGradKernel void Compute(const framework::ExecutionContext& ctx) const override { CheckAttrs(ctx); - auto* x = ctx.Input("X"); - auto* label = ctx.Input("Label"); - auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* x = ctx.Input("X"); + auto* label = ctx.Input("Label"); + auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dx = ctx.Output(framework::GradVarName("X")); auto place = ctx.GetPlace(); diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h index b6de90e8e0367..8c055c2323c84 100644 --- a/paddle/fluid/operators/similarity_focus_op.h +++ b/paddle/fluid/operators/similarity_focus_op.h @@ -24,14 +24,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SimilarityFocusKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - Tensor* out = context.Output("Out"); - const Tensor* x = context.Input("X"); + phi::DenseTensor* out = context.Output("Out"); + const phi::DenseTensor* x = context.Input("X"); T* out_data = out->mutable_data(context.GetPlace()); const T* x_data = x->data(); diff --git a/paddle/fluid/operators/size_op.cc b/paddle/fluid/operators/size_op.cc index 79b4be9de2dbb..6d04f7a1c7cab 100644 --- a/paddle/fluid/operators/size_op.cc +++ b/paddle/fluid/operators/size_op.cc @@ -33,7 +33,7 @@ class SizeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return expected_kernel_type; } diff --git a/paddle/fluid/operators/size_op_mlu.cc b/paddle/fluid/operators/size_op_mlu.cc index 32338b05d6e30..5553f538a575d 100644 --- a/paddle/fluid/operators/size_op_mlu.cc +++ b/paddle/fluid/operators/size_op_mlu.cc @@ -22,8 +22,8 @@ template class SizeMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("Input"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); int64_t size = x->numel(); diff --git a/paddle/fluid/operators/size_op_npu.cc b/paddle/fluid/operators/size_op_npu.cc index ed95a85be9815..92aeba9280568 100644 --- a/paddle/fluid/operators/size_op_npu.cc +++ b/paddle/fluid/operators/size_op_npu.cc @@ -22,8 +22,8 @@ template class SizeNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("Input"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("Input"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); Tensor cpu_tensor; diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 9d9e5816db702..44a9bd24032ab 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SliceOp : public framework::OperatorWithKernel { public: @@ -162,9 +162,9 @@ class SliceOp : public framework::OperatorWithKernel { // 16(depending on which blocking format is used) submemory cannot be // created, so in that scenario a fallback is needed auto tmp_md = dnnl::memory::desc( - phi::vectorize(ctx.Input("Input")->dims()), + phi::vectorize(ctx.Input("Input")->dims()), dnnl::memory::data_type::f32, - ctx.Input("Input")->format()); + ctx.Input("Input")->format()); if (tmp_md.data.format_desc.blocking.inner_nblks == 0) return framework::OpKernelType(input_data_type, ctx.GetPlace(), @@ -338,9 +338,10 @@ class SliceOpGrad : public framework::OperatorWithKernel { // created, so in that scenario a fallback is needed auto tmp_md = dnnl::memory::desc( phi::vectorize( - ctx.Input(framework::GradVarName("Out"))->dims()), + ctx.Input(framework::GradVarName("Out")) + ->dims()), dnnl::memory::data_type::f32, - ctx.Input(framework::GradVarName("Out"))->format()); + ctx.Input(framework::GradVarName("Out"))->format()); if (tmp_md.data.format_desc.blocking.inner_nblks == 0) return framework::OpKernelType(input_data_type, ctx.GetPlace(), diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h index beaec7bc5b91b..5efb0c3819450 100644 --- a/paddle/fluid/operators/slice_op.h +++ b/paddle/fluid/operators/slice_op.h @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using Variable = framework::Variable; using LoDTensorArray = framework::LoDTensorArray; using DDim = framework::DDim; @@ -73,7 +73,7 @@ inline void DealTensorArray(const framework::ExecutionContext& ctx, } } } else { - auto out = ctx.Output("Out"); + auto out = ctx.Output("Out"); auto in_tensor = in_array->at(start); paddle::framework::TensorCopy(in_tensor, ctx.GetPlace(), out); } @@ -99,16 +99,19 @@ class SliceKernel : public framework::OpKernel { auto infer_flags = ctx.Attr>("infer_flags"); // Step 1: Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = GetDataFromTensor( + ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = + GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } @@ -143,16 +146,19 @@ class SliceGradKernel : public framework::OpKernel { std::vector ends(ends_int.begin(), ends_int.end()); // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = GetDataFromTensor( + ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = + GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } @@ -196,7 +202,8 @@ class SliceGradKernel : public framework::OpKernel { d_out_arr->at(i), ctx.GetPlace(), &(d_in_arr->at(start + i))); } } else { - auto* d_out = ctx.Input(framework::GradVarName("Out")); + auto* d_out = + ctx.Input(framework::GradVarName("Out")); paddle::framework::TensorCopy( *d_out, ctx.GetPlace(), &(d_in_arr->at(start))); } diff --git a/paddle/fluid/operators/slice_op_mlu.cc b/paddle/fluid/operators/slice_op_mlu.cc index 7d12916f33439..60c86b1fcf5f6 100644 --- a/paddle/fluid/operators/slice_op_mlu.cc +++ b/paddle/fluid/operators/slice_op_mlu.cc @@ -19,14 +19,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SliceMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* out = ctx.Output("Out"); + auto* input = ctx.Input("Input"); + auto* out = ctx.Output("Out"); auto axes = ctx.Attr>("axes"); auto starts = ctx.Attr>("starts"); @@ -36,16 +36,18 @@ class SliceMLUKernel : public framework::OpKernel { auto infer_flags = ctx.Attr>("infer_flags"); // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = + GetDataFromTensor(ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } @@ -127,25 +129,28 @@ template class SliceGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dinput = ctx.Output(framework::GradVarName("Input")); + auto* input = ctx.Input("Input"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dinput = + ctx.Output(framework::GradVarName("Input")); auto axes = ctx.Attr>("axes"); auto starts = ctx.Attr>("starts"); auto ends = ctx.Attr>("ends"); // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = + GetDataFromTensor(ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } diff --git a/paddle/fluid/operators/slice_op_npu.cc b/paddle/fluid/operators/slice_op_npu.cc index 9d248bfd7f39c..5ed606c7e0057 100644 --- a/paddle/fluid/operators/slice_op_npu.cc +++ b/paddle/fluid/operators/slice_op_npu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; void UpdateAttr(const framework::DDim& in_dims, @@ -58,8 +58,8 @@ template class SliceNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* out = ctx.Output("Out"); + auto* input = ctx.Input("Input"); + auto* out = ctx.Output("Out"); auto axes_int = ctx.Attr>("axes"); auto starts_int = ctx.Attr>("starts"); @@ -74,16 +74,18 @@ class SliceNPUKernel : public framework::OpKernel { const auto& in_dims = input->dims(); // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = + GetDataFromTensor(ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } @@ -154,9 +156,10 @@ template class SliceGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dinput = ctx.Output(framework::GradVarName("Input")); + auto* input = ctx.Input("Input"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dinput = + ctx.Output(framework::GradVarName("Input")); auto axes_int = ctx.Attr>("axes"); auto starts_int = ctx.Attr>("starts"); @@ -166,16 +169,18 @@ class SliceGradNPUKernel : public framework::OpKernel { std::vector ends(ends_int.begin(), ends_int.end()); // Get the accurate attribute value of starts and ends - auto starts_tensor_list = ctx.MultiInput("StartsTensorList"); + auto starts_tensor_list = + ctx.MultiInput("StartsTensorList"); if (ctx.HasInput("StartsTensor")) { - starts = GetDataFromTensor(ctx.Input("StartsTensor")); + starts = + GetDataFromTensor(ctx.Input("StartsTensor")); } else if (starts_tensor_list.size() > 0) { starts = GetDataFromTensorList(starts_tensor_list); } - auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); + auto ends_tensor_list = ctx.MultiInput("EndsTensorList"); if (ctx.HasInput("EndsTensor")) { - ends = GetDataFromTensor(ctx.Input("EndsTensor")); + ends = GetDataFromTensor(ctx.Input("EndsTensor")); } else if (ends_tensor_list.size() > 0) { ends = GetDataFromTensorList(ends_tensor_list); } diff --git a/paddle/fluid/operators/smooth_l1_loss_op.h b/paddle/fluid/operators/smooth_l1_loss_op.h index fd9d62a78f540..3cc565ef91203 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op.h +++ b/paddle/fluid/operators/smooth_l1_loss_op.h @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template @@ -50,12 +50,12 @@ template class SmoothL1LossKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto* in1 = context.Input("Y"); - auto* in2 = context.Input("InsideWeight"); - auto* in3 = context.Input("OutsideWeight"); - auto* out0 = context.Output("Diff"); - auto* out1 = context.Output("Out"); + auto* in0 = context.Input("X"); + auto* in1 = context.Input("Y"); + auto* in2 = context.Input("InsideWeight"); + auto* in3 = context.Input("OutsideWeight"); + auto* out0 = context.Output("Diff"); + auto* out1 = context.Output("Out"); out0->mutable_data(context.GetPlace()); out1->mutable_data(context.GetPlace()); @@ -121,10 +121,10 @@ template class SmoothL1LossGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("InsideWeight"); - auto* in1 = context.Input("OutsideWeight"); - auto* in2 = context.Input("Diff"); - auto* og = context.Input(framework::GradVarName("Out")); + auto* in0 = context.Input("InsideWeight"); + auto* in1 = context.Input("OutsideWeight"); + auto* in2 = context.Input("Diff"); + auto* og = context.Input(framework::GradVarName("Out")); auto sigma = static_cast(context.Attr("sigma")); T sigma2 = sigma * sigma; bool has_weight = (in0 != nullptr) && (in1 != nullptr); @@ -165,8 +165,8 @@ class SmoothL1LossGradKernel : public framework::OpKernel { Eigen::array({{1, static_cast(cols)}})) * weights * diff_mat_view; - auto* out0 = context.Output(framework::GradVarName("X")); - auto* out1 = context.Output(framework::GradVarName("Y")); + auto* out0 = context.Output(framework::GradVarName("X")); + auto* out1 = context.Output(framework::GradVarName("Y")); if (out0) { out0->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc index 5c1e0cfbb5e90..1a4fb14bbb0b6 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc +++ b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc @@ -23,12 +23,12 @@ template class SmoothL1LossNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in_x = context.Input("X"); - auto* in_y = context.Input("Y"); - auto* inside_weight = context.Input("InsideWeight"); - auto* outside_weight = context.Input("OutsideWeight"); - auto* out_diff = context.Output("Diff"); - auto* out_loss = context.Output("Out"); + auto* in_x = context.Input("X"); + auto* in_y = context.Input("Y"); + auto* inside_weight = context.Input("InsideWeight"); + auto* outside_weight = context.Input("OutsideWeight"); + auto* out_diff = context.Output("Diff"); + auto* out_loss = context.Output("Out"); out_diff->mutable_data(context.GetPlace()); out_loss->mutable_data(context.GetPlace()); @@ -117,12 +117,14 @@ template class SmoothL1LossGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* inside_weight = context.Input("InsideWeight"); - auto* outside_weight = context.Input("OutsideWeight"); - auto* diff = context.Input("Diff"); - auto* og = context.Input(framework::GradVarName("Out")); - auto* outx_grad = context.Output(framework::GradVarName("X")); - auto* outy_grad = context.Output(framework::GradVarName("Y")); + auto* inside_weight = context.Input("InsideWeight"); + auto* outside_weight = context.Input("OutsideWeight"); + auto* diff = context.Input("Diff"); + auto* og = context.Input(framework::GradVarName("Out")); + auto* outx_grad = + context.Output(framework::GradVarName("X")); + auto* outy_grad = + context.Output(framework::GradVarName("Y")); auto sigma = context.Attr("sigma"); T sigma2 = 1.0 / (sigma * sigma); bool has_weight = (inside_weight != nullptr) && (outside_weight != nullptr); diff --git a/paddle/fluid/operators/softmax_op_mlu.cc b/paddle/fluid/operators/softmax_op_mlu.cc index 50ef6c6599294..0a39b1335d6e4 100644 --- a/paddle/fluid/operators/softmax_op_mlu.cc +++ b/paddle/fluid/operators/softmax_op_mlu.cc @@ -67,7 +67,7 @@ class SoftmaxGradMLUKernel : public framework::OpKernel { auto* out = ctx.Input("Out"); auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dX = ctx.Output(framework::GradVarName("X")); dX->mutable_data(ctx.GetPlace()); const int rank = out->dims().size(); diff --git a/paddle/fluid/operators/softmax_op_npu.cc b/paddle/fluid/operators/softmax_op_npu.cc index 6e4ccadaec04f..fa40fa3be7a66 100644 --- a/paddle/fluid/operators/softmax_op_npu.cc +++ b/paddle/fluid/operators/softmax_op_npu.cc @@ -48,7 +48,7 @@ class SoftmaxGradNPUKernel : public framework::OpKernel { auto* out = ctx.Input("Out"); auto* dOut = ctx.Input(framework::GradVarName("Out")); - auto* dX = ctx.Output(framework::GradVarName("X")); + auto* dX = ctx.Output(framework::GradVarName("X")); auto dims = dX->dims(); const int rank = dims.size(); diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc index f2fb529656744..91333b3393000 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_mlu.cc @@ -19,17 +19,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SoftmaxWithCrossEntropyMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* logits = ctx.Input("Logits"); - auto* labels = ctx.Input("Label"); - auto* softmax = ctx.Output("Softmax"); - auto* loss = ctx.Output("Loss"); - auto* backprop = ctx.Output("Backprop"); + auto* logits = ctx.Input("Logits"); + auto* labels = ctx.Input("Label"); + auto* softmax = ctx.Output("Softmax"); + auto* loss = ctx.Output("Loss"); + auto* backprop = ctx.Output("Backprop"); auto soft_label = ctx.Attr("soft_label"); PADDLE_ENFORCE_EQ(ctx.Attr("use_softmax"), @@ -103,7 +103,7 @@ class SoftmaxWithCrossEntropyMLUKernel : public framework::OpKernel { platform::errors::InvalidArgument( "If soft_label=False, axis must be -1 or" " can be regard as last dimention in mlu kernel.")); - framework::Tensor labels_int32(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor labels_int32(framework::TransToPhiDataType(VT::INT32)); labels_int32.Resize(labels->dims()); labels_int32.mutable_data(ctx.GetPlace()); @@ -142,9 +142,11 @@ template class SoftmaxWithCrossEntropyGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* backprop = ctx.Input("Backprop"); - auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); - auto* logits_grad = ctx.Output(framework::GradVarName("Logits")); + auto* backprop = ctx.Input("Backprop"); + auto* loss_grad = + ctx.Input(framework::GradVarName("Loss")); + auto* logits_grad = + ctx.Output(framework::GradVarName("Logits")); PADDLE_ENFORCE_NOT_NULL(backprop, platform::errors::PreconditionNotMet( "backprop should not be null in MLU kernel of " diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc index db1581a26febb..ddcb07b4d77e4 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_npu.cc @@ -24,17 +24,17 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SoftmaxWithCrossEntropyNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* logits = ctx.Input("Logits"); - auto* labels = ctx.Input("Label"); - auto* softmax = ctx.Output("Softmax"); - auto* loss = ctx.Output("Loss"); - auto* backprop = ctx.Output("Backprop"); + auto* logits = ctx.Input("Logits"); + auto* labels = ctx.Input("Label"); + auto* softmax = ctx.Output("Softmax"); + auto* loss = ctx.Output("Loss"); + auto* backprop = ctx.Output("Backprop"); auto soft_label = ctx.Attr("soft_label"); PADDLE_ENFORCE_EQ(soft_label, false, @@ -93,9 +93,11 @@ template class SoftmaxWithCrossEntropyGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* backprop = ctx.Input("Backprop"); - auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); - auto* logits_grad = ctx.Output(framework::GradVarName("Logits")); + auto* backprop = ctx.Input("Backprop"); + auto* loss_grad = + ctx.Input(framework::GradVarName("Loss")); + auto* logits_grad = + ctx.Output(framework::GradVarName("Logits")); PADDLE_ENFORCE_NOT_NULL(backprop, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/operators/space_to_depth_op.cc b/paddle/fluid/operators/space_to_depth_op.cc index dce7539fe72b8..6cc8d0f79be4e 100644 --- a/paddle/fluid/operators/space_to_depth_op.cc +++ b/paddle/fluid/operators/space_to_depth_op.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class SpaceToDepthOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index fd2ccfdea33cf..b03a0b6c84e71 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -203,22 +203,22 @@ __global__ void BlockSparseSoftmaxBackward(T* dst, } } -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; /* input: sparse C in CSR format (num_rows,num_rows) output: sparse C after softmax operation */ template void SparseSoftmaxForward(const phi::GPUContext& ctx, - const Tensor* offset, - const Tensor* columns, - Tensor* input, - Tensor* output, + const phi::DenseTensor* offset, + const phi::DenseTensor* columns, + phi::DenseTensor* input, + phi::DenseTensor* output, const int blocksize, const int num_rows, const int num_cols, - const Tensor* key_padding_mask, - const Tensor* attn_mask) { + const phi::DenseTensor* key_padding_mask, + const phi::DenseTensor* attn_mask) { const int* offset_data = offset->data(); const int* columns_data = columns->data(); T* input_data = input->data(); @@ -323,11 +323,11 @@ void SparseSoftmaxForward(const phi::GPUContext& ctx, template void SparseSoftmaxBackward(const phi::GPUContext& ctx, - const Tensor* offset, - const Tensor* columns, - Tensor* dx, - const Tensor* dout, - const Tensor* out, + const phi::DenseTensor* offset, + const phi::DenseTensor* columns, + phi::DenseTensor* dx, + const phi::DenseTensor* dout, + const phi::DenseTensor* out, const int blocksize, const int num_rows, const int num_cols) { @@ -454,11 +454,11 @@ output: sparse C in CSR format (num_rows,num_rows) */ template void DotSdd(const phi::GPUContext& ctx, - const Tensor* a, - const Tensor* b, - const Tensor* c_offset, - const Tensor* c_columns, - Tensor* c_value, + const phi::DenseTensor* a, + const phi::DenseTensor* b, + const phi::DenseTensor* c_offset, + const phi::DenseTensor* c_columns, + phi::DenseTensor* c_value, const int num_rows, const int num_cols, const bool a_transpose, @@ -550,11 +550,11 @@ output: dense C (num_rows,num_cols) */ template void DotDsd(const phi::GPUContext& ctx, - const Tensor* a_offset, - const Tensor* a_columns, - const Tensor* a_value, - const Tensor* b, - Tensor* c, + const phi::DenseTensor* a_offset, + const phi::DenseTensor* a_columns, + const phi::DenseTensor* a_value, + const phi::DenseTensor* b, + phi::DenseTensor* c, const int num_rows, const int num_cols, const bool a_transpose, @@ -641,7 +641,7 @@ void DotDsd(const phi::GPUContext& ctx, platform::dynload::cusparseDestroy(handle); } -std::vector GetSplitTensor(Tensor* input) { +std::vector GetSplitTensor(phi::DenseTensor* input) { auto dims = input->dims(); int batch_size = dims[0]; int num_heads = dims[1]; @@ -658,23 +658,24 @@ template class SparseAttentionCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto query = *ctx.Input("Q"); - auto key = *ctx.Input("K"); - auto value = *ctx.Input("V"); - auto offset = *ctx.Input("Offset"); - auto columns = *ctx.Input("Columns"); - auto output_ptr = ctx.Output("Out"); + auto query = *ctx.Input("Q"); + auto key = *ctx.Input("K"); + auto value = *ctx.Input("V"); + auto offset = *ctx.Input("Offset"); + auto columns = *ctx.Input("Columns"); + auto output_ptr = ctx.Output("Out"); output_ptr->mutable_data(ctx.GetPlace()); - auto sparse_dot_sdd_ptr = ctx.Output("SparseDotSdd"); + auto sparse_dot_sdd_ptr = ctx.Output("SparseDotSdd"); sparse_dot_sdd_ptr->mutable_data(ctx.GetPlace()); - auto softmax_ptr = ctx.Output("Softmax"); + auto softmax_ptr = ctx.Output("Softmax"); softmax_ptr->mutable_data(ctx.GetPlace()); // add Mask auto* key_padding_mask = ctx.HasInput("KeyPaddingMask") - ? ctx.Input("KeyPaddingMask") + ? ctx.Input("KeyPaddingMask") : nullptr; - auto* attn_mask = - ctx.HasInput("AttnMask") ? ctx.Input("AttnMask") : nullptr; + auto* attn_mask = ctx.HasInput("AttnMask") + ? ctx.Input("AttnMask") + : nullptr; auto output = *output_ptr; auto result_sdd = *sparse_dot_sdd_ptr; @@ -775,17 +776,19 @@ template class SparseAttentionGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto query = *ctx.Input("Q"); - auto key = *ctx.Input("K"); - auto value = *ctx.Input("V"); - auto offset = *ctx.Input("Offset"); - auto columns = *ctx.Input("Columns"); - auto sparse_dot_sdd = *ctx.Input("SparseDotSdd"); - auto softmax = *ctx.Input("Softmax"); - auto dout = *ctx.Input(framework::GradVarName("Out")); - auto* dquery_ptr = ctx.Output(framework::GradVarName("Q")); - auto* dkey_ptr = ctx.Output(framework::GradVarName("K")); - auto* dvalue_ptr = ctx.Output(framework::GradVarName("V")); + auto query = *ctx.Input("Q"); + auto key = *ctx.Input("K"); + auto value = *ctx.Input("V"); + auto offset = *ctx.Input("Offset"); + auto columns = *ctx.Input("Columns"); + auto sparse_dot_sdd = *ctx.Input("SparseDotSdd"); + auto softmax = *ctx.Input("Softmax"); + auto dout = *ctx.Input(framework::GradVarName("Out")); + auto* dquery_ptr = + ctx.Output(framework::GradVarName("Q")); + auto* dkey_ptr = ctx.Output(framework::GradVarName("K")); + auto* dvalue_ptr = + ctx.Output(framework::GradVarName("V")); dquery_ptr->mutable_data(ctx.GetPlace()); dkey_ptr->mutable_data(ctx.GetPlace()); dvalue_ptr->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/spectral_norm_op.cc b/paddle/fluid/operators/spectral_norm_op.cc index 19a846afd4376..372e31aa9af63 100644 --- a/paddle/fluid/operators/spectral_norm_op.cc +++ b/paddle/fluid/operators/spectral_norm_op.cc @@ -20,8 +20,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class SpectralNormOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index e5c59575a749b..8ca08e75c40ec 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { using framework::LoDTensor; -using framework::Tensor; + using framework::Variable; class SplitOp : public framework::OperatorWithKernel { @@ -120,7 +120,7 @@ class SplitOp : public framework::OperatorWithKernel { // reorders, because if blocked dimension is not divisible by 8 or // 16(depending on which blocking format is used) submemory cannot be // created, so in that scenario a fallback is needed - const auto x_md = ctx.Input("X")->mem_desc(); + const auto x_md = ctx.Input("X")->mem_desc(); if (x_md.data.format_desc.blocking.inner_nblks == 0) return framework::OpKernelType(input_data_type, ctx.GetPlace(), @@ -133,7 +133,7 @@ class SplitOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "AxisTensor" || var_name == "SectionsTensorList") { return expected_kernel_type; diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc index 635f3925a8f02..cda18720e7aba 100644 --- a/paddle/fluid/operators/split_op_mlu.cc +++ b/paddle/fluid/operators/split_op_mlu.cc @@ -18,15 +18,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SplitMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // init parameter - auto* in = ctx.Input("X"); - auto outs = ctx.MultiOutput("Out"); + auto* in = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); int num = ctx.Attr("num"); std::vector sections = ctx.Attr>("sections"); int axis = ctx.Attr("axis"); @@ -36,12 +36,12 @@ class SplitMLUKernel : public framework::OpKernel { bool need_resize_outs_dims = false; if (ctx.HasInput("AxisTensor")) { - auto* axis_tensor = ctx.Input("AxisTensor"); + auto* axis_tensor = ctx.Input("AxisTensor"); axis = GetDataFromTensor(axis_tensor)[0]; need_resize_outs_dims = true; } auto sections_tensor_list = - ctx.MultiInput("SectionsTensorList"); + ctx.MultiInput("SectionsTensorList"); if (sections_tensor_list.size() > 0) { sections = GetDataFromTensorList(sections_tensor_list); need_resize_outs_dims = true; diff --git a/paddle/fluid/operators/split_op_npu.cc b/paddle/fluid/operators/split_op_npu.cc index 427070e8f2120..2fa8fa2a805eb 100644 --- a/paddle/fluid/operators/split_op_npu.cc +++ b/paddle/fluid/operators/split_op_npu.cc @@ -21,14 +21,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SplitNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto outs = ctx.MultiOutput("Out"); + auto* in = ctx.Input("X"); + auto outs = ctx.MultiOutput("Out"); int num = ctx.Attr("num"); std::vector sections = ctx.Attr>("sections"); int axis = ctx.Attr("axis"); diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h index fd369aee0eaa6..260d368dd0ba1 100644 --- a/paddle/fluid/operators/spp_op.h +++ b/paddle/fluid/operators/spp_op.h @@ -28,8 +28,8 @@ template class SppKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - const framework::Tensor* in_x = context.Input("X"); - auto* out = context.Output("Out"); + const phi::DenseTensor* in_x = context.Input("X"); + auto* out = context.Output("Out"); int pyramid_height = context.template Attr("pyramid_height"); std::string pooling_type = context.template Attr("pooling_type"); @@ -48,7 +48,7 @@ class SppKernel : public framework::OpKernel { std::vector strides({kernel_size_h, kernel_size_w}); std::vector paddings({padding_h, padding_w}); // pooling output shape - framework::Tensor out_level; + phi::DenseTensor out_level; std::vector output_shape_vec( {in_x->dims()[0], in_x->dims()[1], bins, bins}); framework::DDim output_shape(phi::make_ddim(output_shape_vec)); @@ -110,12 +110,12 @@ template class SppGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - const framework::Tensor* in_x = context.Input("X"); - const framework::Tensor* out = context.Input("Out"); - const framework::Tensor* out_grad = - context.Input(framework::GradVarName("Out")); - framework::Tensor* in_x_grad = - context.Output(framework::GradVarName("X")); + const phi::DenseTensor* in_x = context.Input("X"); + const phi::DenseTensor* out = context.Input("Out"); + const phi::DenseTensor* out_grad = + context.Input(framework::GradVarName("Out")); + phi::DenseTensor* in_x_grad = + context.Output(framework::GradVarName("X")); int pyramid_height = context.template Attr("pyramid_height"); std::string pooling_type = context.template Attr("pooling_type"); @@ -140,8 +140,8 @@ class SppGradKernel : public framework::OpKernel { std::vector strides({kernel_size_h, kernel_size_w}); std::vector paddings({padding_h, padding_w}); // split out and outgrad ... to flatten - framework::Tensor out_level; - framework::Tensor outgrad_level; + phi::DenseTensor out_level; + phi::DenseTensor outgrad_level; int out_flatten_w = in_x->dims()[1] * bins * bins; std::vector out_flatten_shape_vec( {in_x->dims()[0], out_flatten_w}); diff --git a/paddle/fluid/operators/squared_l2_distance_op.h b/paddle/fluid/operators/squared_l2_distance_op.h index 1c1a34b14ba00..1698c65fc47ac 100644 --- a/paddle/fluid/operators/squared_l2_distance_op.h +++ b/paddle/fluid/operators/squared_l2_distance_op.h @@ -19,16 +19,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SquaredL2DistanceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto* in1 = context.Input("Y"); - auto* out0 = context.Output("sub_result"); - auto* out1 = context.Output("Out"); + auto* in0 = context.Input("X"); + auto* in1 = context.Input("Y"); + auto* out0 = context.Output("sub_result"); + auto* out1 = context.Output("Out"); auto in0_dims = in0->dims(); auto in1_dims = in1->dims(); @@ -66,10 +66,10 @@ template class SquaredL2DistanceGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("sub_result"); - auto* in1 = context.Input(framework::GradVarName("Out")); - auto* x_g = context.Output(framework::GradVarName("X")); - auto* y_g = context.Output(framework::GradVarName("Y")); + auto* in0 = context.Input("sub_result"); + auto* in1 = context.Input(framework::GradVarName("Out")); + auto* x_g = context.Output(framework::GradVarName("X")); + auto* y_g = context.Output(framework::GradVarName("Y")); PADDLE_ENFORCE_NOT_NULL( x_g, diff --git a/paddle/fluid/operators/squared_l2_norm_op.cc b/paddle/fluid/operators/squared_l2_norm_op.cc index 4653cc0cc2860..2e97f5b9b0dc2 100644 --- a/paddle/fluid/operators/squared_l2_norm_op.cc +++ b/paddle/fluid/operators/squared_l2_norm_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class SquaredL2NormOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/squared_l2_norm_op_mlu.cc b/paddle/fluid/operators/squared_l2_norm_op_mlu.cc index 741d23540b6e4..fcd83b40875ec 100644 --- a/paddle/fluid/operators/squared_l2_norm_op_mlu.cc +++ b/paddle/fluid/operators/squared_l2_norm_op_mlu.cc @@ -19,15 +19,15 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SquaredL2NormMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto &dev_ctx = context.template device_context(); - auto *x = context.Input("X"); - auto *out = context.Output("Out"); + auto *x = context.Input("X"); + auto *out = context.Output("Out"); auto place = context.GetPlace(); @@ -40,9 +40,9 @@ class SquaredL2NormMLUKernel : public framework::OpKernel { MLUCnnl::L2Loss(context, input_desc.get(), GetBasePtr(x), GetBasePtr(out)); // do mul - framework::Tensor scale_tensor = + phi::DenseTensor scale_tensor = context.AllocateTmpTensor({1}, dev_ctx); - framework::Tensor bias_tensor = + phi::DenseTensor bias_tensor = context.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc scale_desc(scale_tensor); MLUCnnlTensorDesc bias_desc(bias_tensor); @@ -67,9 +67,11 @@ class SquaredL2NormGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { auto &dev_ctx = context.template device_context(); - auto *x = context.Input("X"); - auto *x_grad = context.Output(framework::GradVarName("X")); - auto *out_grad = context.Input(framework::GradVarName("Out")); + auto *x = context.Input("X"); + auto *x_grad = + context.Output(framework::GradVarName("X")); + auto *out_grad = + context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ( out_grad->numel(), @@ -108,9 +110,9 @@ class SquaredL2NormGradMLUKernel : public framework::OpKernel { ToCnnlDataType(x->dtype())); // mul - framework::Tensor scale_tensor = + phi::DenseTensor scale_tensor = context.AllocateTmpTensor({1}, dev_ctx); - framework::Tensor bias_tensor = + phi::DenseTensor bias_tensor = context.AllocateTmpTensor({1}, dev_ctx); MLUCnnlTensorDesc scale_desc(scale_tensor); MLUCnnlTensorDesc bias_desc(bias_tensor); diff --git a/paddle/fluid/operators/squared_l2_norm_op_npu.cc b/paddle/fluid/operators/squared_l2_norm_op_npu.cc index 56fae36570c19..25260ed4c1286 100644 --- a/paddle/fluid/operators/squared_l2_norm_op_npu.cc +++ b/paddle/fluid/operators/squared_l2_norm_op_npu.cc @@ -18,14 +18,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class SquaredL2NormNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *x = context.Input("X"); - auto *out = context.Output("Out"); + auto *x = context.Input("X"); + auto *out = context.Output("Out"); auto place = context.GetPlace(); auto stream = @@ -47,9 +47,11 @@ template class SquaredL2NormGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { - auto *x = context.Input("X"); - auto *x_grad = context.Output(framework::GradVarName("X")); - auto *out_grad = context.Input(framework::GradVarName("Out")); + auto *x = context.Input("X"); + auto *x_grad = + context.Output(framework::GradVarName("X")); + auto *out_grad = + context.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ( out_grad->numel(), diff --git a/paddle/fluid/operators/stack_op_mlu.cc b/paddle/fluid/operators/stack_op_mlu.cc index 3e9a51b47939e..eeac200676f4a 100644 --- a/paddle/fluid/operators/stack_op_mlu.cc +++ b/paddle/fluid/operators/stack_op_mlu.cc @@ -19,14 +19,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class StackMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto x = ctx.MultiInput("X"); - auto* y = ctx.Output("Y"); + auto x = ctx.MultiInput("X"); + auto* y = ctx.Output("Y"); int axis = ctx.Attr("axis"); if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); diff --git a/paddle/fluid/operators/stack_op_npu.cc b/paddle/fluid/operators/stack_op_npu.cc index 540e3f22cd816..3b5c0b1dc0cb6 100644 --- a/paddle/fluid/operators/stack_op_npu.cc +++ b/paddle/fluid/operators/stack_op_npu.cc @@ -18,14 +18,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class StackNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto x = ctx.MultiInput("X"); - auto* y = ctx.Output("Y"); + auto x = ctx.MultiInput("X"); + auto* y = ctx.Output("Y"); int axis = ctx.Attr("axis"); if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); @@ -39,7 +39,7 @@ class StackNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - std::vector x_list; + std::vector x_list; for (int i = 0; i < num; i++) { x_list.push_back(*x[i]); } @@ -55,8 +55,8 @@ template class StackGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* dy = ctx.Input(framework::GradVarName("Y")); - auto dx = ctx.MultiOutput(framework::GradVarName("X")); + auto* dy = ctx.Input(framework::GradVarName("Y")); + auto dx = ctx.MultiOutput(framework::GradVarName("X")); int axis = ctx.Attr("axis"); if (axis < 0) axis += dy->dims().size(); int num = dy->dims()[axis]; @@ -70,7 +70,7 @@ class StackGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - std::vector dx_list; + std::vector dx_list; for (int i = 0; i < num; i++) { dx[i]->mutable_data(ctx.GetPlace()); dx_list.push_back(*dx[i]); diff --git a/paddle/fluid/operators/stft_op.h b/paddle/fluid/operators/stft_op.h index fb2ca31608cd7..23130f687e305 100644 --- a/paddle/fluid/operators/stft_op.h +++ b/paddle/fluid/operators/stft_op.h @@ -27,7 +27,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class StftKernel : public framework::OpKernel { @@ -38,9 +38,9 @@ class StftKernel : public framework::OpKernel { */ void Compute(const framework::ExecutionContext& ctx) const override { using C = paddle::platform::complex; - const Tensor* x = ctx.Input("X"); - const Tensor* window = ctx.Input("Window"); - Tensor* out = ctx.Output("Out"); + const phi::DenseTensor* x = ctx.Input("X"); + const phi::DenseTensor* window = ctx.Input("Window"); + phi::DenseTensor* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); const size_t x_rank = x->dims().size(); @@ -109,9 +109,9 @@ class StftGradKernel : public framework::OpKernel { using C = paddle::platform::complex; auto& dev_ctx = ctx.device_context(); - const Tensor* window = ctx.Input("Window"); - const auto* dy = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + const phi::DenseTensor* window = ctx.Input("Window"); + const auto* dy = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); dx->mutable_data(ctx.GetPlace()); const size_t dy_rank = dy->dims().size(); diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index 350c3820a38c2..3a562d2f26e85 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -146,9 +146,9 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, template inline void StridedMemcpyWithAxis0( const platform::DeviceContext& dev_ctx, - const framework::Tensor& input, - const std::vector& shape_refer, - std::vector* outputs) { + const phi::DenseTensor& input, + const std::vector& shape_refer, + std::vector* outputs) { const framework::DDim in_stride = stride_numel(input.dims()); const int axis = 0; size_t input_offset = 0; diff --git a/paddle/fluid/operators/strided_slice_op.cc b/paddle/fluid/operators/strided_slice_op.cc index 788ffb7e1f82e..ad75d23452c91 100644 --- a/paddle/fluid/operators/strided_slice_op.cc +++ b/paddle/fluid/operators/strided_slice_op.cc @@ -27,7 +27,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class StridedSliceOp : public framework::OperatorWithKernel { public: @@ -58,7 +58,7 @@ class StridedSliceOp : public framework::OperatorWithKernel { ctx.device_context()); } // NOTE: cuda pinned tensor need to copy its data to target place - auto in_tensor = ctx.Input("Input"); + auto in_tensor = ctx.Input("Input"); if (platform::is_cuda_pinned_place(in_tensor->place())) { return framework::OpKernelType( framework::TransToProtoVarType(in_tensor->dtype()), diff --git a/paddle/fluid/operators/strided_slice_op_mlu.cc b/paddle/fluid/operators/strided_slice_op_mlu.cc index 95972d8159267..806c8205d0970 100644 --- a/paddle/fluid/operators/strided_slice_op_mlu.cc +++ b/paddle/fluid/operators/strided_slice_op_mlu.cc @@ -95,7 +95,7 @@ class StridedSliceMLUKernel : public framework::OpKernel { false, platform::errors::InvalidArgument( "Tensor array as input is not supported.")); - int rank = ctx.Input("Input")->dims().size(); + int rank = ctx.Input("Input")->dims().size(); switch (rank) { case 1: StridedSliceCompute<1>(ctx); @@ -133,8 +133,8 @@ class StridedSliceMLUKernel : public framework::OpKernel { void StridedSliceCompute(const framework::ExecutionContext& ctx) const { auto place = ctx.GetPlace(); - auto in = ctx.Input("Input"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("Input"); + auto out = ctx.Output("Out"); auto in_dims = in->dims(); // list @@ -152,31 +152,31 @@ class StridedSliceMLUKernel : public framework::OpKernel { // vector> auto list_new_starts_tensor = - ctx.MultiInput("StartsTensorList"); + ctx.MultiInput("StartsTensorList"); auto list_new_ends_tensor = - ctx.MultiInput("EndsTensorList"); + ctx.MultiInput("EndsTensorList"); auto list_new_strides_tensor = - ctx.MultiInput("StridesTensorList"); + ctx.MultiInput("StridesTensorList"); // Tensor if (list_new_starts_tensor.size() > 0) { starts = GetDataFromTensorList(list_new_starts_tensor); } else if (ctx.HasInput("StartsTensor")) { - auto* starts_tensor = ctx.Input("StartsTensor"); + auto* starts_tensor = ctx.Input("StartsTensor"); starts = GetDataFromTensor(starts_tensor); } if (list_new_ends_tensor.size() > 0) { ends = GetDataFromTensorList(list_new_ends_tensor); } else if (ctx.HasInput("EndsTensor")) { - auto* ends_tensor = ctx.Input("EndsTensor"); + auto* ends_tensor = ctx.Input("EndsTensor"); ends = GetDataFromTensor(ends_tensor); } if (list_new_strides_tensor.size() > 0) { strides = GetDataFromTensorList(list_new_strides_tensor); } else if (ctx.HasInput("StridesTensor")) { - auto* strides_tensor = ctx.Input("StridesTensor"); + auto* strides_tensor = ctx.Input("StridesTensor"); strides = GetDataFromTensor(strides_tensor); } @@ -263,7 +263,7 @@ class StridedSliceGradMLUKernel : public framework::OpKernel { false, platform::errors::InvalidArgument( "Tensor array as input is not supported.")); - int rank = ctx.Input("Input")->dims().size(); + int rank = ctx.Input("Input")->dims().size(); switch (rank) { case 1: @@ -302,10 +302,10 @@ class StridedSliceGradMLUKernel : public framework::OpKernel { void StridedSliceGradCompute(const framework::ExecutionContext& ctx) const { auto place = ctx.GetPlace(); - auto* input = ctx.Input("Input"); + auto* input = ctx.Input("Input"); auto input_dims = input->dims(); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("Input")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("Input")); dx->mutable_data(input_dims, place); auto starts_int = ctx.Attr>("starts"); @@ -321,30 +321,30 @@ class StridedSliceGradMLUKernel : public framework::OpKernel { auto decrease_axis = ctx.Attr>("decrease_axis"); auto list_new_ends_tensor = - ctx.MultiInput("EndsTensorList"); + ctx.MultiInput("EndsTensorList"); auto list_new_starts_tensor = - ctx.MultiInput("StartsTensorList"); + ctx.MultiInput("StartsTensorList"); auto list_new_strides_tensor = - ctx.MultiInput("StridesTensorList"); + ctx.MultiInput("StridesTensorList"); if (list_new_starts_tensor.size() > 0) { starts = GetDataFromTensorList(list_new_starts_tensor); } else if (ctx.HasInput("StartsTensor")) { - auto* starts_tensor = ctx.Input("StartsTensor"); + auto* starts_tensor = ctx.Input("StartsTensor"); starts = GetDataFromTensor(starts_tensor); } if (list_new_ends_tensor.size() > 0) { ends = GetDataFromTensorList(list_new_ends_tensor); } else if (ctx.HasInput("EndsTensor")) { - auto* ends_tensor = ctx.Input("EndsTensor"); + auto* ends_tensor = ctx.Input("EndsTensor"); ends = GetDataFromTensor(ends_tensor); } if (list_new_strides_tensor.size() > 0) { strides = GetDataFromTensorList(list_new_strides_tensor); } else if (ctx.HasInput("StridesTensor")) { - auto* strides_tensor = ctx.Input("StridesTensor"); + auto* strides_tensor = ctx.Input("StridesTensor"); strides = GetDataFromTensor(strides_tensor); } diff --git a/paddle/fluid/operators/strided_slice_op_npu.cc b/paddle/fluid/operators/strided_slice_op_npu.cc index 6b7ee2e744ea7..9a1492fea1ee5 100644 --- a/paddle/fluid/operators/strided_slice_op_npu.cc +++ b/paddle/fluid/operators/strided_slice_op_npu.cc @@ -29,7 +29,7 @@ class StridedSliceNPUKernel : public framework::OpKernel { false, platform::errors::InvalidArgument( "Tensor array as input is not supported.")); - int rank = ctx.Input("Input")->dims().size(); + int rank = ctx.Input("Input")->dims().size(); switch (rank) { case 1: StridedSliceCompute<1>(ctx); @@ -64,8 +64,8 @@ class StridedSliceNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - auto in = ctx.Input("Input"); - auto out = ctx.Output("Out"); + auto in = ctx.Input("Input"); + auto out = ctx.Output("Out"); auto in_dims = in->dims(); // list @@ -83,31 +83,31 @@ class StridedSliceNPUKernel : public framework::OpKernel { // vector> auto list_new_ends_tensor = - ctx.MultiInput("EndsTensorList"); + ctx.MultiInput("EndsTensorList"); auto list_new_starts_tensor = - ctx.MultiInput("StartsTensorList"); + ctx.MultiInput("StartsTensorList"); auto list_new_strides_tensor = - ctx.MultiInput("StridesTensorList"); + ctx.MultiInput("StridesTensorList"); // Tensor if (list_new_starts_tensor.size() > 0) { starts = GetDataFromTensorList(list_new_starts_tensor); } else if (ctx.HasInput("StartsTensor")) { - auto* starts_tensor = ctx.Input("StartsTensor"); + auto* starts_tensor = ctx.Input("StartsTensor"); starts = GetDataFromTensor(starts_tensor); } if (list_new_ends_tensor.size() > 0) { ends = GetDataFromTensorList(list_new_ends_tensor); } else if (ctx.HasInput("EndsTensor")) { - auto* ends_tensor = ctx.Input("EndsTensor"); + auto* ends_tensor = ctx.Input("EndsTensor"); ends = GetDataFromTensor(ends_tensor); } if (list_new_strides_tensor.size() > 0) { strides = GetDataFromTensorList(list_new_strides_tensor); } else if (ctx.HasInput("StridesTensor")) { - auto* strides_tensor = ctx.Input("StridesTensor"); + auto* strides_tensor = ctx.Input("StridesTensor"); strides = GetDataFromTensor(strides_tensor); } @@ -256,7 +256,7 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { false, platform::errors::InvalidArgument( "Tensor array as input is not supported.")); - int rank = ctx.Input("Input")->dims().size(); + int rank = ctx.Input("Input")->dims().size(); switch (rank) { case 1: @@ -291,10 +291,10 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); - auto* input = ctx.Input("Input"); + auto* input = ctx.Input("Input"); auto input_dims = input->dims(); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("Input")); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("Input")); dx->mutable_data(input_dims, place); auto starts_int = ctx.Attr>("starts"); @@ -310,30 +310,30 @@ class StridedSliceGradNPUKernel : public framework::OpKernel { auto decrease_axis = ctx.Attr>("decrease_axis"); auto list_new_ends_tensor = - ctx.MultiInput("EndsTensorList"); + ctx.MultiInput("EndsTensorList"); auto list_new_starts_tensor = - ctx.MultiInput("StartsTensorList"); + ctx.MultiInput("StartsTensorList"); auto list_new_strides_tensor = - ctx.MultiInput("StridesTensorList"); + ctx.MultiInput("StridesTensorList"); if (list_new_starts_tensor.size() > 0) { starts = GetDataFromTensorList(list_new_starts_tensor); } else if (ctx.HasInput("StartsTensor")) { - auto* starts_tensor = ctx.Input("StartsTensor"); + auto* starts_tensor = ctx.Input("StartsTensor"); starts = GetDataFromTensor(starts_tensor); } if (list_new_ends_tensor.size() > 0) { ends = GetDataFromTensorList(list_new_ends_tensor); } else if (ctx.HasInput("EndsTensor")) { - auto* ends_tensor = ctx.Input("EndsTensor"); + auto* ends_tensor = ctx.Input("EndsTensor"); ends = GetDataFromTensor(ends_tensor); } if (list_new_strides_tensor.size() > 0) { strides = GetDataFromTensorList(list_new_strides_tensor); } else if (ctx.HasInput("StridesTensor")) { - auto* strides_tensor = ctx.Input("StridesTensor"); + auto* strides_tensor = ctx.Input("StridesTensor"); strides = GetDataFromTensor(strides_tensor); } diff --git a/paddle/fluid/operators/string/faster_tokenizer_op.cc b/paddle/fluid/operators/string/faster_tokenizer_op.cc index 3539e2213a39d..f1a7688372adc 100644 --- a/paddle/fluid/operators/string/faster_tokenizer_op.cc +++ b/paddle/fluid/operators/string/faster_tokenizer_op.cc @@ -477,7 +477,7 @@ class FasterTokenizerOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/string/faster_tokenizer_op.h b/paddle/fluid/operators/string/faster_tokenizer_op.h index d02313ea8d0c5..0c98190252419 100644 --- a/paddle/fluid/operators/string/faster_tokenizer_op.h +++ b/paddle/fluid/operators/string/faster_tokenizer_op.h @@ -129,8 +129,8 @@ class FasterTokenizerKernel : public framework::OpKernel { auto* text = ctx.Input("Text"); auto* vocab = ctx.Input("Vocab"); - auto* input_ids = ctx.Output("InputIds"); - auto* seg_ids = ctx.Output("SegmentIds"); + auto* input_ids = ctx.Output("InputIds"); + auto* seg_ids = ctx.Output("SegmentIds"); auto do_lower_case = static_cast(ctx.Attr("do_lower_case")); auto is_split_into_words = diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index ec570f709c35c..8cf6a095e2304 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -28,7 +28,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; class SumOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc index 0bb51581e9360..7c741632c1e1f 100644 --- a/paddle/fluid/operators/sum_op_mlu.cc +++ b/paddle/fluid/operators/sum_op_mlu.cc @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using LoDTensor = framework::LoDTensor; @@ -31,7 +31,7 @@ class SumMLUKernel : public framework::OpKernel { if (out_var->IsType()) { // init auto *out = out_var->GetMutable(); - auto ins = ctx.MultiInput("X"); + auto ins = ctx.MultiInput("X"); out->mutable_data(ctx.GetPlace()); auto place = ctx.GetPlace(); int ins_size = static_cast(ins.size()); diff --git a/paddle/fluid/operators/sum_op_npu.cc b/paddle/fluid/operators/sum_op_npu.cc index a7bb442fa650c..c5ad250bb3cd1 100644 --- a/paddle/fluid/operators/sum_op_npu.cc +++ b/paddle/fluid/operators/sum_op_npu.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using SelectedRows = phi::SelectedRows; using LoDTensor = framework::LoDTensor; @@ -34,7 +34,7 @@ class SumNPUKernel : public framework::OpKernel { auto out_var = ctx.OutputVar("Out"); if (out_var->IsType()) { auto *out = out_var->GetMutable(); - auto x = ctx.MultiInput("X"); + auto x = ctx.MultiInput("X"); out->mutable_data(ctx.GetPlace()); auto place = ctx.GetPlace(); @@ -45,7 +45,7 @@ class SumNPUKernel : public framework::OpKernel { return; } - std::vector inputs; + std::vector inputs; std::vector names; for (int i = 0; i < n; ++i) { if (x[i] && x[i]->numel() > 0) { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index a445868153452..82f4ba7a50b6e 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -20,7 +20,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; + using SelectedRows = phi::SelectedRows; using LoDTensor = framework::LoDTensor; template diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index a796aa9d54444..3fb7994566a2f 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -36,9 +36,9 @@ namespace paddle { namespace operators { namespace math { -using Tensor = framework::Tensor; -using InTensors = std::vector; -using OutTensors = std::vector; +using Tensor = phi::DenseTensor; +using InTensors = std::vector; +using OutTensors = std::vector; using OpName = std::string; template >; + std::map>; using NameOutTensor = std::vector; explicit DeviceIndependenceTensorOperations( const framework::ExecutionContext& context) : context(context) {} - framework::Tensor Pow(const framework::Tensor& x, T exp) { - framework::Tensor out; + phi::DenseTensor Pow(const phi::DenseTensor& x, T exp) { + phi::DenseTensor out; auto for_range = GetForRange(x.numel()); int numel = x.numel(); PowFunctor functor( @@ -295,11 +295,11 @@ struct DeviceIndependenceTensorOperations { for_range(functor); return out; } - framework::Tensor Matmul(const framework::Tensor& mat_a, - const framework::Tensor& mat_b, - bool trans_a = false, - bool trans_b = false) { - framework::Tensor ret; + phi::DenseTensor Matmul(const phi::DenseTensor& mat_a, + const phi::DenseTensor& mat_b, + bool trans_a = false, + bool trans_b = false) { + phi::DenseTensor ret; auto a_dim = mat_a.dims(); auto b_dim = mat_b.dims(); std::vector x_vec = phi::vectorize(a_dim); @@ -315,9 +315,9 @@ struct DeviceIndependenceTensorOperations { return ret; } - framework::Tensor Transpose(const framework::Tensor& x) { + phi::DenseTensor Transpose(const phi::DenseTensor& x) { // transpose the last two dimision - framework::Tensor ret; + phi::DenseTensor ret; auto x_dim = x.dims(); auto x_vec = phi::vectorize(x_dim); int rank = x_vec.size(); @@ -345,10 +345,10 @@ struct DeviceIndependenceTensorOperations { } return ret; } - framework::Tensor Diag(const framework::Tensor& x, - int offset = 0, - // FIXME link error - int padding_value = 0) { + phi::DenseTensor Diag(const phi::DenseTensor& x, + int offset = 0, + // FIXME link error + int padding_value = 0) { PADDLE_ENFORCE_EQ(padding_value, 0, platform::errors::InvalidArgument( @@ -359,7 +359,7 @@ struct DeviceIndependenceTensorOperations { "Current diag only support offset = 0," "you can use DiagOp instead(not recommend)")); - framework::Tensor ret; + phi::DenseTensor ret; int x_rank = x.dims().size(); std::vector out_shape; if (x_rank == 2) { @@ -382,7 +382,7 @@ struct DeviceIndependenceTensorOperations { } // batch_diag for CPU only - Tensor BatchDiag(const Tensor& x, int batch) { + Tensor BatchDiag(const phi::DenseTensor& x, int batch) { Tensor out; auto* x_data = x.data>(); auto numel = x.numel(); @@ -411,8 +411,8 @@ struct DeviceIndependenceTensorOperations { } // a complex number x times a real number y, which is represented as (a+0j) - Tensor RealMulComplex(const Tensor& x, const Tensor& y) { - framework::Tensor ret; + Tensor RealMulComplex(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); ret.Resize(phi::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( @@ -420,9 +420,8 @@ struct DeviceIndependenceTensorOperations { return ret; } - framework::Tensor Div(const framework::Tensor& x, - const framework::Tensor& y) { - framework::Tensor ret; + phi::DenseTensor Div(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor ret; if (x.type() != y.type()) { ret.mutable_data(x.dims(), context.GetPlace()); auto x_vector = EigenVector::Flatten(x); @@ -439,19 +438,17 @@ struct DeviceIndependenceTensorOperations { } return ret; } - framework::Tensor Add(const framework::Tensor& x, - const framework::Tensor& y) { + phi::DenseTensor Add(const phi::DenseTensor& x, const phi::DenseTensor& y) { // element wise add, support numpy broadcast. - framework::Tensor ret; + phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); ret.Resize(phi::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( context, &x, &y, -1, AddFunctor(), &ret); return ret; } - framework::Tensor Mul(const framework::Tensor& x, - const framework::Tensor& y) { - framework::Tensor ret; + phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); ret.Resize(phi::make_ddim(out_shape)); ElementwiseComputeEx, DeviceContext, T>( @@ -459,16 +456,16 @@ struct DeviceIndependenceTensorOperations { return ret; } - framework::Tensor ReduceSum(const framework::Tensor& x, - std::vector out_dim) { + phi::DenseTensor ReduceSum(const phi::DenseTensor& x, + std::vector out_dim) { framework::AttributeMap attrs; attrs["dim"] = std::vector{-1}; NameInTensorMap inputs({{"X", {&x}}}); return CreateOpRunAndReturnTensor("reduce_sum", inputs, attrs, out_dim); } - framework::Tensor ReduceMax(const framework::Tensor& x, - std::vector out_dim) { + phi::DenseTensor ReduceMax(const phi::DenseTensor& x, + std::vector out_dim) { framework::AttributeMap attrs; attrs["dim"] = std::vector{-1}; NameInTensorMap inputs({{"X", {&x}}}); @@ -476,9 +473,8 @@ struct DeviceIndependenceTensorOperations { } // Support float and complex type subtraction,the default is T type template - framework::Tensor Sub(const framework::Tensor& x, - const framework::Tensor& y) { - framework::Tensor ret; + phi::DenseTensor Sub(const phi::DenseTensor& x, const phi::DenseTensor& y) { + phi::DenseTensor ret; std::vector out_shape = GetBroadcastShape({&x, &y}); ret.Resize(phi::make_ddim(out_shape)); if (platform::is_gpu_place(context.GetPlace())) { @@ -501,9 +497,9 @@ struct DeviceIndependenceTensorOperations { } return ret; } - const framework::Tensor Unsqueeze(const framework::Tensor& x, int axis = 0) { + const phi::DenseTensor Unsqueeze(const phi::DenseTensor& x, int axis = 0) { // don't copy data, only change the dims - framework::Tensor out; + phi::DenseTensor out; out.ShareDataWith(x); std::vector out_shape = phi::vectorize(x.dims()); if (axis >= 0) { @@ -516,28 +512,28 @@ struct DeviceIndependenceTensorOperations { out.Resize(phi::make_ddim(out_shape)); return out; } - framework::Tensor Fill(std::vector shape, float fill_value) { - framework::Tensor ret; + phi::DenseTensor Fill(std::vector shape, float fill_value) { + phi::DenseTensor ret; ret.Resize(phi::make_ddim(shape)); ret.mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); phi::funcs::SetConstant()(dev_ctx, &ret, T(fill_value)); return ret; } - framework::Tensor Infinits(std::vector shape) { + phi::DenseTensor Infinits(std::vector shape) { auto value = static_cast(std::numeric_limits::infinity()); return Fill(shape, value); } - framework::Tensor Eye(int n) { + phi::DenseTensor Eye(int n) { auto output = Fill({n}, 1); auto ret = Diag(output); return ret; } - framework::Tensor Slice(const framework::Tensor& x, - std::vector axes, - std::vector starts, - std::vector ends) { - framework::Tensor ret; + phi::DenseTensor Slice(const phi::DenseTensor& x, + std::vector axes, + std::vector starts, + std::vector ends) { + phi::DenseTensor ret; std::vector new_axes = axes; std::vector out_shape = phi::vectorize(x.dims()); size_t rank = out_shape.size(); @@ -588,9 +584,9 @@ struct DeviceIndependenceTensorOperations { return ret; } - framework::Tensor TrilTriu(const framework::Tensor& x, - int diagonal, - bool lower) { + phi::DenseTensor TrilTriu(const phi::DenseTensor& x, + int diagonal, + bool lower) { framework::AttributeMap attrs; attrs["diagonal"] = diagonal; attrs["lower"] = lower; @@ -604,11 +600,11 @@ struct DeviceIndependenceTensorOperations { return CreateOpRunAndReturnTensor("tril_triu", inputs, attrs, out_shape); } - framework::Tensor TriangularSolve(const framework::Tensor& x, - const framework::Tensor& y, - bool upper, - bool transpose, - bool unitriangular) { + phi::DenseTensor TriangularSolve(const phi::DenseTensor& x, + const phi::DenseTensor& y, + bool upper, + bool transpose, + bool unitriangular) { framework::AttributeMap attrs; attrs["upper"] = upper; attrs["transpose"] = transpose; @@ -635,9 +631,9 @@ struct DeviceIndependenceTensorOperations { "triangular_solve", inputs, attrs, out_shape); } - framework::Tensor ConcatTwoTensors(const framework::Tensor& x, - const framework::Tensor& y, - int axis) { + phi::DenseTensor ConcatTwoTensors(const phi::DenseTensor& x, + const phi::DenseTensor& y, + int axis) { framework::AttributeMap attrs; attrs["axis"] = axis; std::vector inputs_dims({x.dims(), y.dims()}); @@ -654,7 +650,7 @@ struct DeviceIndependenceTensorOperations { return CreateOpRunAndReturnTensor("concat", inputs, attrs, out_shape); } - Tensor Conj(const Tensor& x) { + Tensor Conj(const phi::DenseTensor& x) { Tensor out; auto* out_data = out.mutable_data(x.dims(), context.GetPlace()); auto* x_data = x.data(); @@ -664,7 +660,7 @@ struct DeviceIndependenceTensorOperations { return out; } - Tensor Real(const Tensor& x) { + Tensor Real(const phi::DenseTensor& x) { Tensor out; auto numel = x.numel(); auto* out_data = out.mutable_data>( @@ -682,8 +678,8 @@ struct DeviceIndependenceTensorOperations { const int n, const int num_lower_diags, const int num_upper_diags, - const Tensor& scale, - const Tensor& input) { + const phi::DenseTensor& scale, + const phi::DenseTensor& input) { Tensor out; auto& dev_ctx = context.template device_context(); platform::ForRange for_range(dev_ctx, input.numel()); @@ -709,10 +705,10 @@ struct DeviceIndependenceTensorOperations { return platform::ForRange(dev_ctx, numel); } template - void EigenSliceWrapper(const framework::Tensor* in, + void EigenSliceWrapper(const phi::DenseTensor* in, const std::vector& start, const std::vector& end, - framework::Tensor* out) { + phi::DenseTensor* out) { // Slice by call Eigen Tensor Function `.slice()` size_t rank = in->dims().size(); PADDLE_ENFORCE_EQ(start.size(), @@ -742,7 +738,7 @@ struct DeviceIndependenceTensorOperations { offsets_32bit, extents_32bit); } - framework::Tensor CreateOpRunAndReturnTensor( + phi::DenseTensor CreateOpRunAndReturnTensor( const std::string& type, const NameInTensorMap& inputs, const framework::AttributeMap& attrs, @@ -781,7 +777,7 @@ struct DeviceIndependenceTensorOperations { auto op = framework::OpRegistry::CreateOp(type, op_inputs, op_outputs, attrs); op->Run(local_scope, context.GetPlace()); - framework::Tensor out; + phi::DenseTensor out; out.ShareDataWith(*(out_var->GetMutable())); out.Resize(phi::make_ddim(out_shape)); context.scope().DeleteScope(&local_scope); diff --git a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc index 0a95088c31f2c..d2fa4f794efb6 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_mlu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_mlu.cc @@ -26,7 +26,7 @@ namespace operators { #define NO_USE_CNCL 0 #define GET_LAYOUT_OFFSET 2 -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; static std::vector supported_input_layout = { CNNL_LAYOUT_NC, CNNL_LAYOUT_NLC, CNNL_LAYOUT_NHWC, CNNL_LAYOUT_NDHWC}; @@ -51,16 +51,16 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { "to set use_global_stats True. Please use batch_norm " "in this case.")); - const auto *x = ctx.Input("X"); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *mean = ctx.Input("Mean"); - const auto *variance = ctx.Input("Variance"); - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); - auto *y = ctx.Output("Y"); + const auto *x = ctx.Input("X"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *mean = ctx.Input("Mean"); + const auto *variance = ctx.Input("Variance"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); + auto *y = ctx.Output("Y"); const auto &x_dims = x->dims(); PADDLE_ENFORCE_GE(x_dims.size(), @@ -136,7 +136,7 @@ class SyncBatchNormMLUKernel : public framework::OpKernel { nullptr); } else { // training if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); + const auto *mom_tensor = ctx.Input("MomentumTensor"); Tensor mom_cpu; paddle::framework::TensorCopySync( *mom_tensor, platform::CPUPlace(), &mom_cpu); @@ -287,17 +287,18 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { const std::string layout_str = ctx.Attr("data_layout"); const DataLayout layout = framework::StringToDataLayout(layout_str); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); // init output - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - const auto *saved_mean = ctx.Input("SavedMean"); - const auto *saved_inv_var = ctx.Input("SavedVariance"); + const auto *saved_mean = ctx.Input("SavedMean"); + const auto *saved_inv_var = ctx.Input("SavedVariance"); const Tensor *x; if (ctx.HasInput("Y")) { @@ -306,7 +307,7 @@ class SyncBatchNormMLUGradKernel : public framework::OpKernel { platform::errors::InvalidArgument( "sync_batch_norm_grad doesn't support input Y")); } else { - x = ctx.Input("X"); + x = ctx.Input("X"); } const auto &x_dims = x->dims(); diff --git a/paddle/fluid/operators/sync_batch_norm_op_npu.cc b/paddle/fluid/operators/sync_batch_norm_op_npu.cc index 1789110a18af2..08136d7fe2ea4 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc @@ -20,7 +20,7 @@ limitations under the Licnse. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template void training_or_inference(const framework::ExecutionContext &ctx, @@ -325,16 +325,16 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { "to set use_global_stats True. Please use batch_norm " "in this case.")); - const auto *x = ctx.Input("X"); - auto *y = ctx.Output("Y"); - const auto *scale = ctx.Input("Scale"); - const auto *bias = ctx.Input("Bias"); - const auto *mean = ctx.Input("Mean"); - const auto *variance = ctx.Input("Variance"); - auto *mean_out = ctx.Output("MeanOut"); - auto *variance_out = ctx.Output("VarianceOut"); - auto *saved_mean = ctx.Output("SavedMean"); - auto *saved_variance = ctx.Output("SavedVariance"); + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Y"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *mean = ctx.Input("Mean"); + const auto *variance = ctx.Input("Variance"); + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); const auto &x_dims = x->dims(); PADDLE_ENFORCE_EQ(x_dims.size(), @@ -398,7 +398,7 @@ class SyncBatchNormNPUKernel : public framework::OpKernel { } else { // training if (ctx.HasInput("MomentumTensor")) { - const auto *mom_tensor = ctx.Input("MomentumTensor"); + const auto *mom_tensor = ctx.Input("MomentumTensor"); Tensor mom_cpu; paddle::framework::TensorCopySync( *mom_tensor, platform::CPUPlace(), &mom_cpu); @@ -581,12 +581,13 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { const std::string layout_str = ctx.Attr("data_layout"); const DataLayout layout = framework::StringToDataLayout(layout_str); - const auto *d_y = ctx.Input(framework::GradVarName("Y")); - const auto *scale = ctx.Input("Scale"); - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - const auto *saved_mean = ctx.Input("SavedMean"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = + ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + const auto *saved_mean = ctx.Input("SavedMean"); const Tensor *x; if (ctx.HasInput("Y")) { @@ -595,7 +596,7 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { platform::errors::InvalidArgument( "sync_batch_norm_grad doesn't support input Y")); } else { - x = ctx.Input("X"); + x = ctx.Input("X"); } int N, C, H, W, D; diff --git a/paddle/fluid/operators/take_along_axis_op.cc b/paddle/fluid/operators/take_along_axis_op.cc index 544c23d8658d2..0856645ad67e3 100644 --- a/paddle/fluid/operators/take_along_axis_op.cc +++ b/paddle/fluid/operators/take_along_axis_op.cc @@ -39,7 +39,7 @@ class TakeAlongAxisOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); @@ -79,7 +79,7 @@ class TakeAlongAxisGradOp : public framework::OperatorWithKernel { } framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { return framework::OpKernelType( expected_kernel_type.data_type_, tensor.place(), tensor.layout()); diff --git a/paddle/fluid/operators/take_along_axis_op_npu.cc b/paddle/fluid/operators/take_along_axis_op_npu.cc index ab2c42a86b72a..d4f06e6446887 100644 --- a/paddle/fluid/operators/take_along_axis_op_npu.cc +++ b/paddle/fluid/operators/take_along_axis_op_npu.cc @@ -22,16 +22,16 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class NPUTakeAlongAxisKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto input = ctx.Input("Input"); + auto input = ctx.Input("Input"); auto axis = ctx.Attr("Axis"); - auto index = ctx.Input("Index"); - auto result = ctx.Output("Result"); + auto index = ctx.Input("Index"); + auto result = ctx.Output("Result"); result->mutable_data(ctx.GetPlace()); auto stream = @@ -48,10 +48,12 @@ class NPUTakeAlongAxisGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto axis = ctx.Attr("Axis"); - auto index = ctx.Input("Index"); - auto result_grad = ctx.Input(framework::GradVarName("Result")); + auto index = ctx.Input("Index"); + auto result_grad = + ctx.Input(framework::GradVarName("Result")); - auto input_grad = ctx.Output(framework::GradVarName("Input")); + auto input_grad = + ctx.Output(framework::GradVarName("Input")); input_grad->mutable_data(ctx.GetPlace()); auto stream = diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index b1ca81d566063..445b2fa89e4aa 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -28,7 +28,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using DDim = framework::DDim; using LoD = framework::LoD; diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index 28a1260b3efd3..ab24d6b763546 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -29,7 +29,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using Sampler = math::Sampler; using DDim = framework::DDim; using LoD = framework::LoD; diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc index 4525d431ff136..f880181662e24 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h index 41d2662ae2a4d..40bac8c364583 100644 --- a/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h +++ b/paddle/fluid/operators/teacher_student_sigmoid_loss_op.h @@ -19,14 +19,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class TeacherStudentSigmoidLossOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - Tensor* y = context.Output("Y"); - const Tensor* x = context.Input("X"); - const Tensor* labels = context.Input("Label"); + phi::DenseTensor* y = context.Output("Y"); + const phi::DenseTensor* x = context.Input("X"); + const phi::DenseTensor* labels = context.Input("Label"); T* y_data = y->mutable_data(context.GetPlace()); const T* x_data = x->data(); const T* label_data = labels->data(); @@ -68,13 +68,14 @@ template class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - const Tensor* x = context.Input("X"); + const phi::DenseTensor* x = context.Input("X"); const T* x_data = x->data(); - Tensor* dx = context.Output(framework::GradVarName("X")); + phi::DenseTensor* dx = + context.Output(framework::GradVarName("X")); T* dx_data = dx->mutable_data(context.GetPlace()); - const Tensor* labels = context.Input("Label"); + const phi::DenseTensor* labels = context.Input("Label"); const T* label_data = labels->data(); T soft_max_up_bound = @@ -84,8 +85,8 @@ class TeacherStudentSigmoidLossGradOpKernel : public framework::OpKernel { int64_t batch_size = x->dims()[0]; - const framework::Tensor* dOut = - context.Input(framework::GradVarName("Y")); + const phi::DenseTensor* dOut = + context.Input(framework::GradVarName("Y")); const T* dout_data = dOut->data(); diff --git a/paddle/fluid/operators/temporal_shift_op.cc b/paddle/fluid/operators/temporal_shift_op.cc index ca446fcb97236..119fcf4f49bc5 100644 --- a/paddle/fluid/operators/temporal_shift_op.cc +++ b/paddle/fluid/operators/temporal_shift_op.cc @@ -23,8 +23,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - class TemporalShiftOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index fe6a9dd36c8e8..979cc129e9d1e 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -16,8 +16,6 @@ namespace paddle { namespace operators { -using framework::Tensor; - template __global__ void KeTemporalShiftFwNCHW(const T* input, T* output, @@ -162,8 +160,8 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel { true, platform::errors::InvalidArgument( "This kernel only runs on GPU device.")); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); int t = ctx.Attr("seg_num"); float shift_ratio = ctx.Attr("shift_ratio"); const std::string data_format_str = ctx.Attr("data_format"); @@ -215,8 +213,10 @@ template class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); int t = ctx.Attr("seg_num"); float shift_ratio = ctx.Attr("shift_ratio"); const std::string data_format_str = ctx.Attr("data_format"); diff --git a/paddle/fluid/operators/temporal_shift_op.h b/paddle/fluid/operators/temporal_shift_op.h index 688cd816b50cc..abc00c7e600a1 100644 --- a/paddle/fluid/operators/temporal_shift_op.h +++ b/paddle/fluid/operators/temporal_shift_op.h @@ -16,7 +16,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; template @@ -91,8 +91,10 @@ template class TemporalShiftGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input_grad = ctx.Output(framework::GradVarName("X")); - auto* output_grad = ctx.Input(framework::GradVarName("Out")); + auto* input_grad = + ctx.Output(framework::GradVarName("X")); + auto* output_grad = + ctx.Input(framework::GradVarName("Out")); int t = ctx.Attr("seg_num"); float shift_ratio = ctx.Attr("shift_ratio"); const std::string data_format_str = ctx.Attr("data_format"); diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index fa25d0b3494bd..bbab23530fa1c 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -21,7 +21,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; void LodTensorArray2LodTensorVector(const framework::Scope &scope, const std::string &base_name, diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 0f8a3d1206264..e7ac8909ca691 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -505,7 +505,7 @@ class TensorRTEngineOp : public framework::OperatorBase { inference::analysis::GetFromScope(scope, x); // check the input_tensor if (!platform::is_gpu_place(t.place())) { - framework::Tensor out; + phi::DenseTensor out; platform::CUDAPlace dst_place; framework::TransDataDevice(t, dst_place, &out); t.ShareDataWith(out); diff --git a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h index 1162bf21592d5..31f913cc65b9c 100644 --- a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h +++ b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h @@ -25,9 +25,8 @@ namespace paddle { namespace operators { template -static void InitRandom(framework::Tensor *tensor, - const platform::Place &place) { - framework::Tensor cpu_tensor; +static void InitRandom(phi::DenseTensor *tensor, const platform::Place &place) { + phi::DenseTensor cpu_tensor; auto *cpu_ptr = cpu_tensor.mutable_data(tensor->dims(), platform::CPUPlace()); int64_t numel = cpu_tensor.numel(); @@ -69,23 +68,23 @@ static bool TestLeakyReluGradGradMain(const framework::DDim &dim, LeakyReluGradGradFunctor functor; functor.alpha = alpha; auto &dev_ctx = *platform::DeviceContextPool::Instance().Get(place); - framework::Tensor *out = nullptr; - framework::Tensor *dout = nullptr; - framework::Tensor *dx = nullptr; + phi::DenseTensor *out = nullptr; + phi::DenseTensor *dout = nullptr; + phi::DenseTensor *dx = nullptr; - framework::Tensor x; + phi::DenseTensor x; x.Resize(dim); InitRandom(&x, place); - framework::Tensor ddx; + phi::DenseTensor ddx; ddx.Resize(dim); InitRandom(&ddx, place); - framework::Tensor ddout; + phi::DenseTensor ddout; ddout.Resize(dim); InitRandom(&ddout, place); - framework::Tensor ddout_actual; + phi::DenseTensor ddout_actual; ddout_actual.mutable_data(dim, place); LeakyReluGradGradEachElementFunctor actual_functor(ddx.data(), x.data(), @@ -112,7 +111,7 @@ static bool TestLeakyReluGradGradMain(const framework::DDim &dim, dev_ctx.Wait(); - framework::Tensor ddout_cpu, ddout_actual_cpu; + phi::DenseTensor ddout_cpu, ddout_actual_cpu; framework::TensorCopySync(ddout, platform::CPUPlace(), &ddout_cpu); framework::TensorCopySync( ddout_actual, platform::CPUPlace(), &ddout_actual_cpu); diff --git a/paddle/fluid/operators/tile_op.cc b/paddle/fluid/operators/tile_op.cc index 8cf132915402e..172e96737061d 100644 --- a/paddle/fluid/operators/tile_op.cc +++ b/paddle/fluid/operators/tile_op.cc @@ -24,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class TileOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -40,7 +38,7 @@ class TileOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "repeat_times_tensor" || var_name == "RepeatTimes") { return expected_kernel_type; @@ -132,7 +130,7 @@ class TileGradOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const override { if (var_name == "repeat_times_tensor" || var_name == "RepeatTimes") { return expected_kernel_type; diff --git a/paddle/fluid/operators/tile_op_functor.h b/paddle/fluid/operators/tile_op_functor.h index 03aa19c8817ab..16f77f4f17495 100644 --- a/paddle/fluid/operators/tile_op_functor.h +++ b/paddle/fluid/operators/tile_op_functor.h @@ -27,7 +27,7 @@ inline std::vector get_repeat_times( if (ctx.HasInput("RepeatTimes")) { auto* repeat_tensor = ctx.Input("RepeatTimes"); auto* repeat_data = repeat_tensor->data(); - framework::Tensor cpu_repeat_tensor; + phi::DenseTensor cpu_repeat_tensor; if (platform::is_gpu_place(repeat_tensor->place()) || platform::is_xpu_place(repeat_tensor->place()) || platform::is_mlu_place(repeat_tensor->place()) || @@ -42,7 +42,7 @@ inline std::vector get_repeat_times( } auto list_repeat_times_tensor = - ctx.MultiInput("repeat_times_tensor"); + ctx.MultiInput("repeat_times_tensor"); if (list_repeat_times_tensor.size() > 0) { // get tensor from std::vector vec_repeat_times; @@ -52,7 +52,7 @@ inline std::vector get_repeat_times( platform::is_xpu_place(tensor->place()) || platform::is_mlu_place(tensor->place()) || platform::is_npu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_repeat_times.push_back(*temp.data()); } else { diff --git a/paddle/fluid/operators/tile_op_mlu.cc b/paddle/fluid/operators/tile_op_mlu.cc index 80cb6340e4ca7..2b2b3df4431f1 100644 --- a/paddle/fluid/operators/tile_op_mlu.cc +++ b/paddle/fluid/operators/tile_op_mlu.cc @@ -18,13 +18,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class TileMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( rank, 1, @@ -58,7 +58,7 @@ class TileMLUKernel : public framework::OpKernel { MAX_RANK_SUPPORTED, repeat_times_size)); - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto in_dims = in0->dims(); for (size_t i = 0; i < repeat_times.size(); ++i) { PADDLE_ENFORCE_GT( @@ -86,7 +86,7 @@ class TileMLUKernel : public framework::OpKernel { vec_in_dims.size(), repeat_times.size())); - auto* out0 = context.Output("Out"); + auto* out0 = context.Output("Out"); bool repeat_one_times = true; for (size_t i = 0; i < repeat_times.size(); ++i) { if (repeat_times[i] != 1) { diff --git a/paddle/fluid/operators/tile_op_npu.cc b/paddle/fluid/operators/tile_op_npu.cc index 706e9f7c52797..2997052257d18 100644 --- a/paddle/fluid/operators/tile_op_npu.cc +++ b/paddle/fluid/operators/tile_op_npu.cc @@ -18,14 +18,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using NPUDeviceContext = platform::NPUDeviceContext; template class TileNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); + auto rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( rank, 1, @@ -64,7 +64,7 @@ class TileNPUKernel : public framework::OpKernel { protected: void Tile(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); + auto* in0 = context.Input("X"); auto in_dims = in0->dims(); auto repeat_times = get_repeat_times(context); @@ -93,7 +93,7 @@ class TileNPUKernel : public framework::OpKernel { "'repeat_times' for tile op must match after promotion.", vec_in_dims.size(), repeat_times.size())); - auto* out0 = context.Output("Out"); + auto* out0 = context.Output("Out"); framework::DDim new_in_dims = phi::make_ddim(vec_in_dims); framework::DDim out_dims(new_in_dims); diff --git a/paddle/fluid/operators/top_k_function_cuda.h b/paddle/fluid/operators/top_k_function_cuda.h index 4a038c93a1f49..7cc88b24efe78 100644 --- a/paddle/fluid/operators/top_k_function_cuda.h +++ b/paddle/fluid/operators/top_k_function_cuda.h @@ -55,7 +55,7 @@ struct NumericTraits namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; inline void GetDims( const phi::DDim& dim, int axis, int* pre, int* n, int* post) { @@ -903,12 +903,12 @@ __global__ void AssignGradWithAxis(const T* grad_out, // use the radix sort for the topk template bool SortTopk(const phi::GPUContext& ctx, - const framework::Tensor* input_tensor, + const phi::DenseTensor* input_tensor, const int64_t num_cols, const int64_t num_rows, const int k, - framework::Tensor* out_tensor, - framework::Tensor* indices_tensor, + phi::DenseTensor* out_tensor, + phi::DenseTensor* indices_tensor, bool largest = true) { auto cu_stream = ctx.stream(); diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index 79236f590f7dc..c1df0a6b12eac 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -30,7 +30,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; #define FIXED_BLOCK_DIM_BASE(dim, ...) \ case (dim): { \ @@ -52,12 +52,12 @@ class TopkOpCUDAKernel : public framework::OpKernel { platform::is_gpu_place(ctx.GetPlace()), true, platform::errors::InvalidArgument("It must use CUDAPlace.")); - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); int k = static_cast(ctx.Attr("k")); - auto* k_t = ctx.Input("K"); + auto* k_t = ctx.Input("K"); if (k_t) { Tensor k_host; framework::TensorCopySync(*k_t, platform::CPUPlace(), &k_host); @@ -122,10 +122,12 @@ class TopkOpGradCUDAKernel : public framework::OpKernel { platform::is_gpu_place(context.GetPlace()), true, platform::errors::InvalidArgument("It must use CUDAPlace.")); - auto* x = context.Input("X"); - auto* out_grad = context.Input(framework::GradVarName("Out")); - auto* indices = context.Input("Indices"); - auto* x_grad = context.Output(framework::GradVarName("X")); + auto* x = context.Input("X"); + auto* out_grad = + context.Input(framework::GradVarName("Out")); + auto* indices = context.Input("Indices"); + auto* x_grad = + context.Output(framework::GradVarName("X")); T* x_grad_data = x_grad->mutable_data(context.GetPlace()); const T* out_grad_data = out_grad->data(); diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index fa573da0109d5..cd29137d530f4 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -24,19 +24,19 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class TopkKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // Get the top k elements of each row of input tensor - auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); size_t k = static_cast(ctx.Attr("k")); - auto* k_t = ctx.Input("K"); + auto* k_t = ctx.Input("K"); if (k_t) { k = k_t->data()[0]; framework::DDim output_dims = output->dims(); @@ -94,10 +94,12 @@ template class TopkGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out_grad = context.Input(framework::GradVarName("Out")); - auto* indices = context.Input("Indices"); - auto* x_grad = context.Output(framework::GradVarName("X")); + auto* x = context.Input("X"); + auto* out_grad = + context.Input(framework::GradVarName("Out")); + auto* indices = context.Input("Indices"); + auto* x_grad = + context.Output(framework::GradVarName("X")); T* x_grad_data = x_grad->mutable_data(context.GetPlace()); const T* out_grad_data = out_grad->data(); diff --git a/paddle/fluid/operators/top_k_op_mlu.cc b/paddle/fluid/operators/top_k_op_mlu.cc index c38c4388997f6..a6b96466de442 100644 --- a/paddle/fluid/operators/top_k_op_mlu.cc +++ b/paddle/fluid/operators/top_k_op_mlu.cc @@ -28,7 +28,7 @@ class TopkMLUKernel : public framework::OpKernel { const auto& place = ctx.GetPlace(); size_t k = static_cast(ctx.Attr("k")); - auto* k_t = ctx.Input("K"); + auto* k_t = ctx.Input("K"); if (k_t) { auto k_t_ptr = static_cast(k_t->data()); auto size = k_t->numel() * sizeof(int); @@ -51,7 +51,7 @@ class TopkMLUKernel : public framework::OpKernel { const bool sorted = true; const int axis = -1; // cnnl only support int32/int16 type of indices - framework::Tensor indices_int32(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); indices_int32.Resize(indices->dims()); indices_int32.mutable_data(place); diff --git a/paddle/fluid/operators/top_k_op_npu.cc b/paddle/fluid/operators/top_k_op_npu.cc index e0892af480070..4bf4204e79666 100644 --- a/paddle/fluid/operators/top_k_op_npu.cc +++ b/paddle/fluid/operators/top_k_op_npu.cc @@ -18,7 +18,7 @@ limitations under the License. */ namespace paddle { namespace operators { -void gen_assist_seq(framework::Tensor* assit_tensor, +void gen_assist_seq(phi::DenseTensor* assit_tensor, int64_t dim, const framework::ExecutionContext& ctx) { const int64_t dimx2 = dim; @@ -55,7 +55,7 @@ class TopkNPUKernel : public framework::OpKernel { auto size = input->dims().size(); // dim is the last dimension of input auto dim = input->dims()[size - 1]; - framework::Tensor assist_seq_tensor; + phi::DenseTensor assist_seq_tensor; assist_seq_tensor.Resize({2 * dim}); assist_seq_tensor.mutable_data(ctx.GetPlace()); gen_assist_seq(&assist_seq_tensor, dim, ctx); diff --git a/paddle/fluid/operators/top_k_op_xpu.cc b/paddle/fluid/operators/top_k_op_xpu.cc index 9ffcd4d46fc1a..46428a3596d56 100644 --- a/paddle/fluid/operators/top_k_op_xpu.cc +++ b/paddle/fluid/operators/top_k_op_xpu.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class TopkXPUKernel : public framework::OpKernel { using XPUType = typename XPUTypeTrait::Type; @@ -31,15 +31,15 @@ class TopkXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { // Get the top k elements of each row of input tensor - const auto* input = ctx.Input("X"); - auto* output = ctx.Output("Out"); - auto* indices = ctx.Output("Indices"); + const auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); // get k from attr int k = static_cast(ctx.Attr("k")); // get k from input tensor - auto* k_t = ctx.Input("K"); + auto* k_t = ctx.Input("K"); if (k_t) { memory::Copy(platform::CPUPlace(), static_cast(&k), diff --git a/paddle/fluid/operators/top_k_v2_op_mlu.cc b/paddle/fluid/operators/top_k_v2_op_mlu.cc index bce76b1351fc8..b9e3d4ff0224e 100644 --- a/paddle/fluid/operators/top_k_v2_op_mlu.cc +++ b/paddle/fluid/operators/top_k_v2_op_mlu.cc @@ -38,7 +38,7 @@ class TopkV2MLUKernel : public framework::OpKernel { } size_t k = static_cast(ctx.Attr("k")); - auto* k_t = ctx.Input("K"); + auto* k_t = ctx.Input("K"); if (k_t) { auto k_t_ptr = static_cast(k_t->data()); auto size = k_t->numel() * sizeof(int); @@ -59,7 +59,7 @@ class TopkV2MLUKernel : public framework::OpKernel { indices->mutable_data(place); // cnnl only support int32/int16 type of indices - framework::Tensor indices_int32(framework::TransToPhiDataType(VT::INT32)); + phi::DenseTensor indices_int32(framework::TransToPhiDataType(VT::INT32)); indices_int32.Resize(indices->dims()); indices_int32.mutable_data(place); diff --git a/paddle/fluid/operators/top_k_v2_op_npu.cc b/paddle/fluid/operators/top_k_v2_op_npu.cc index 590f4f66fcbee..487938b142dce 100644 --- a/paddle/fluid/operators/top_k_v2_op_npu.cc +++ b/paddle/fluid/operators/top_k_v2_op_npu.cc @@ -26,10 +26,10 @@ template class TopkV2NPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* input = context.Input("X"); - auto* k_tensor = context.Input("K"); - auto* out = context.Output("Out"); - auto* indices = context.Output("Indices"); // type: INT64 + auto* input = context.Input("X"); + auto* k_tensor = context.Input("K"); + auto* out = context.Output("Out"); + auto* indices = context.Output("Indices"); // type: INT64 int32_t k = static_cast(context.Attr("k")); int axis = static_cast(context.Attr("axis")); @@ -58,7 +58,7 @@ class TopkV2NPUKernel : public framework::OpKernel { out->mutable_data(context.GetPlace()); indices->mutable_data(context.GetPlace()); - framework::Tensor indices_int32(experimental::DataType::INT32); + phi::DenseTensor indices_int32(experimental::DataType::INT32); indices_int32.Resize(output_dims); indices_int32.mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc index 86862d4a10f7d..ae1ad94d9f978 100644 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ b/paddle/fluid/operators/transfer_layout_op.cc @@ -64,7 +64,7 @@ class TransferLayoutOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { return framework::OpKernelType(expected_kernel_type.data_type_, expected_kernel_type.place_, diff --git a/paddle/fluid/operators/transfer_layout_op.h b/paddle/fluid/operators/transfer_layout_op.h index 940d26789c52e..a4c7b482ff596 100644 --- a/paddle/fluid/operators/transfer_layout_op.h +++ b/paddle/fluid/operators/transfer_layout_op.h @@ -127,8 +127,8 @@ class TransferLayoutFunctor { private: void TransDataLayout(const platform::DeviceContext &dev_ctx, - const framework::Tensor &in, - framework::Tensor *out) const { + const phi::DenseTensor &in, + phi::DenseTensor *out) const { PADDLE_ENFORCE_EQ( phi::arity(in.dims()), 4, diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index b342f01e46ff7..d04b1ffa94b92 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -25,8 +25,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; - class TransposeOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -262,8 +260,8 @@ class Transpose2Op : public TransposeOp { library_ = framework::LibraryType::kMKLDNN; layout_ = framework::DataLayout::kMKLDNN; using framework::proto::VarType; - auto input_data_type = - framework::TransToProtoVarType(ctx.Input("X")->dtype()); + auto input_data_type = framework::TransToProtoVarType( + ctx.Input("X")->dtype()); customized_type_value = (input_data_type == VarType::INT8 || input_data_type == VarType::UINT8) ? kTransposeMKLDNNINT8 diff --git a/paddle/fluid/operators/transpose_op.cu.h b/paddle/fluid/operators/transpose_op.cu.h index f7c4597d43756..ac5f5adf2594c 100644 --- a/paddle/fluid/operators/transpose_op.cu.h +++ b/paddle/fluid/operators/transpose_op.cu.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using Dim3 = framework::Dim3; using Index3 = framework::Index3; @@ -713,9 +713,9 @@ inline void CombineTransposeDim3(const framework::DDim& shape, template struct TransposeSimple { static bool run(const phi::GPUContext& ctx, - const Tensor& in, + const phi::DenseTensor& in, const std::vector perm, - Tensor* out) { + phi::DenseTensor* out) { // First reduce the dimensions of the input tensor if possible. std::vector new_perm; framework::DDim new_dims; @@ -1157,8 +1157,8 @@ inline void LaunchWithDispatchIndex(const phi::GPUContext& ctx, template inline void SimplifyThenLaunch(const int rank, const DeviceContext& ctx, - const Tensor& in, - Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& perm) { int sm_count = ctx.GetSMCount(); auto src_dims = phi::vectorize(in.dims()); @@ -1182,9 +1182,9 @@ inline void SimplifyThenLaunch(const int rank, template void TransposeGPUKernelDriver(const phi::GPUContext& ctx, - const Tensor& in, + const phi::DenseTensor& in, const std::vector& perm, - Tensor* out) { + phi::DenseTensor* out) { const int rank = perm.size(); int64_t numel = in.numel(); bool ret{false}; diff --git a/paddle/fluid/operators/transpose_op.h b/paddle/fluid/operators/transpose_op.h index 2a6849b1d2584..8b0fe26eeaa30 100644 --- a/paddle/fluid/operators/transpose_op.h +++ b/paddle/fluid/operators/transpose_op.h @@ -28,8 +28,8 @@ enum { kTransposeMKLDNNFP32 = 1, kTransposeMKLDNNINT8 = 2 }; template inline void TransCompute(const int dim, const DeviceContext& dev_ctx, - const framework::Tensor& in, - framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& axis) { switch (dim) { case 1: diff --git a/paddle/fluid/operators/tree_conv_op.h b/paddle/fluid/operators/tree_conv_op.h index 8c479076175dd..ee37c2e9fe09b 100644 --- a/paddle/fluid/operators/tree_conv_op.h +++ b/paddle/fluid/operators/tree_conv_op.h @@ -22,7 +22,7 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DDim = framework::DDim; template class TreeConvKernel : public framework::OpKernel { @@ -31,10 +31,10 @@ class TreeConvKernel : public framework::OpKernel { math::Tree2ColFunctor tree2col; phi::funcs::SetConstant constant; - auto *Edges = ctx.Input("EdgeSet"); - auto *Embeddings = ctx.Input("NodesVector"); - auto *Filter = ctx.Input("Filter"); - auto *output_emb = ctx.Output("Out"); + auto *Edges = ctx.Input("EdgeSet"); + auto *Embeddings = ctx.Input("NodesVector"); + auto *Filter = ctx.Input("Filter"); + auto *output_emb = ctx.Output("Out"); int max_depth = ctx.Attr("max_depth"); auto &dev_ctx = ctx.template device_context(); @@ -78,13 +78,15 @@ template class TreeConvGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *out_g = ctx.Input(framework::GradVarName("Out")); - auto *in_g = ctx.Output(framework::GradVarName("NodesVector")); - auto *filter_g = ctx.Output(framework::GradVarName("Filter")); + auto *out_g = ctx.Input(framework::GradVarName("Out")); + auto *in_g = + ctx.Output(framework::GradVarName("NodesVector")); + auto *filter_g = + ctx.Output(framework::GradVarName("Filter")); int max_depth = ctx.Attr("max_depth"); - auto *Embeddings = ctx.Input("NodesVector"); - auto *edges = ctx.Input("EdgeSet"); - auto *Filter = ctx.Input("Filter"); + auto *Embeddings = ctx.Input("NodesVector"); + auto *edges = ctx.Input("EdgeSet"); + auto *Filter = ctx.Input("Filter"); math::Tree2ColFunctor tree2col; math::Col2TreeFunctor col2tree; phi::funcs::SetConstant constant; diff --git a/paddle/fluid/operators/tril_triu_op_mlu.cc b/paddle/fluid/operators/tril_triu_op_mlu.cc index a4c5a3bddbc58..892261d6693ce 100644 --- a/paddle/fluid/operators/tril_triu_op_mlu.cc +++ b/paddle/fluid/operators/tril_triu_op_mlu.cc @@ -18,8 +18,8 @@ template class TrilTriuMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); int diagonal = ctx.Attr("diagonal"); bool lower = ctx.Attr("lower"); bool upper; diff --git a/paddle/fluid/operators/tril_triu_op_npu.cc b/paddle/fluid/operators/tril_triu_op_npu.cc index aeb8691518c1d..d7ca6a6602c3f 100644 --- a/paddle/fluid/operators/tril_triu_op_npu.cc +++ b/paddle/fluid/operators/tril_triu_op_npu.cc @@ -22,8 +22,8 @@ template class TrilTriuNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); int diagonal = ctx.Attr("diagonal"); bool lower = ctx.Attr("lower"); diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc index 6d5d9f8a3b1d8..d2d51c29371f8 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_mlu.cc @@ -28,10 +28,10 @@ class TruncatedGaussianRandomMLUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { float mean = context.Attr("mean"); float std = context.Attr("std"); - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); - framework::Tensor cpu_tensor(tensor->dtype()); + phi::DenseTensor cpu_tensor(tensor->dtype()); cpu_tensor.Resize(tensor->dims()); T* data_cpu = cpu_tensor.mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc index 433e7d79e1ac3..b5e67ccb24a9a 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc @@ -22,7 +22,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class TruncatedGaussianRandomNPUKernel : public framework::OpKernel { @@ -57,7 +57,7 @@ class TruncatedGaussianRandomNPUKernel : public framework::OpKernel { float max_value = mean + std * 2.0; FillNpuTensorWithConstant(&max_tensor, max_value); - auto* out = ctx.Output("Out"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); auto stream = ctx.template device_context() @@ -80,7 +80,7 @@ class NPUTruncatedGaussianRandomKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { float mean = context.Attr("mean"); float std = context.Attr("std"); - auto* tensor = context.Output("Out"); + auto* tensor = context.Output("Out"); tensor->mutable_data(context.GetPlace()); Tensor cpu_tensor(tensor->dtype()); diff --git a/paddle/fluid/operators/unbind_op.cc b/paddle/fluid/operators/unbind_op.cc index d059c626fe7ea..be64767cb27a4 100644 --- a/paddle/fluid/operators/unbind_op.cc +++ b/paddle/fluid/operators/unbind_op.cc @@ -22,7 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; class UnbindOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc b/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc index 90076a67aafd3..bcd399ec08a7f 100644 --- a/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc +++ b/paddle/fluid/operators/uniform_random_inplace_op_xpu.cc @@ -75,7 +75,7 @@ template class XPUUniformRandomInplaceGradKernel : public framework::OpKernel { public: void Compute(const paddle::framework::ExecutionContext &ctx) const override { - auto *dx = ctx.Output(framework::GradVarName("X")); + auto *dx = ctx.Output(framework::GradVarName("X")); if (dx) { T *data = dx->mutable_data(ctx.GetPlace()); int64_t size = dx->numel(); diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index 5324b9697c94a..154c6906ca7c9 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -65,14 +65,14 @@ template class CPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - framework::Tensor *tensor = nullptr; + phi::DenseTensor *tensor = nullptr; auto out_var = ctx.OutputVar("Out"); std::vector new_shape; auto list_new_shape_tensor = - ctx.MultiInput("ShapeTensorList"); + ctx.MultiInput("ShapeTensorList"); if (list_new_shape_tensor.size() > 0 || ctx.HasInput("ShapeTensor")) { if (ctx.HasInput("ShapeTensor")) { - auto *shape_tensor = ctx.Input("ShapeTensor"); + auto *shape_tensor = ctx.Input("ShapeTensor"); new_shape = GetNewDataFromShapeTensor(shape_tensor); } else if (list_new_shape_tensor.size() > 0) { new_shape = GetNewDataFromShapeTensorList(list_new_shape_tensor); diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu index 2ceb8a68d863d..7065067ddd91a 100644 --- a/paddle/fluid/operators/uniform_random_op.cu +++ b/paddle/fluid/operators/uniform_random_op.cu @@ -20,14 +20,14 @@ template class GPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - framework::Tensor* tensor = nullptr; + phi::DenseTensor* tensor = nullptr; auto out_var = context.OutputVar("Out"); std::vector new_shape; auto list_new_shape_tensor = - context.MultiInput("ShapeTensorList"); + context.MultiInput("ShapeTensorList"); if (list_new_shape_tensor.size() > 0 || context.HasInput("ShapeTensor")) { if (context.HasInput("ShapeTensor")) { - auto* shape_tensor = context.Input("ShapeTensor"); + auto* shape_tensor = context.Input("ShapeTensor"); new_shape = GetNewDataFromShapeTensor(shape_tensor); } else if (list_new_shape_tensor.size() > 0) { new_shape = GetNewDataFromShapeTensorList(list_new_shape_tensor); diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 9f0f93f5573f5..bf2666deda28b 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -30,14 +30,14 @@ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; inline std::vector GetNewDataFromShapeTensor( - const Tensor* new_data_tensor) { + const phi::DenseTensor* new_data_tensor) { if (framework::TransToProtoVarType(new_data_tensor->dtype()) == framework::proto::VarType::INT64) { auto* new_data = new_data_tensor->data(); - framework::Tensor cpu_starts_tensor; + phi::DenseTensor cpu_starts_tensor; if (platform::is_gpu_place(new_data_tensor->place())) { paddle::framework::TensorCopySync( *new_data_tensor, platform::CPUPlace(), &cpu_starts_tensor); @@ -50,7 +50,7 @@ inline std::vector GetNewDataFromShapeTensor( framework::proto::VarType::INT32) { auto* new_data = new_data_tensor->data(); std::vector vec_new_data; - framework::Tensor cpu_starts_tensor; + phi::DenseTensor cpu_starts_tensor; if (platform::is_gpu_place(new_data_tensor->place())) { paddle::framework::TensorCopySync( *new_data_tensor, platform::CPUPlace(), &cpu_starts_tensor); @@ -69,7 +69,7 @@ inline std::vector GetNewDataFromShapeTensor( } inline std::vector GetNewDataFromShapeTensorList( - const std::vector& list_new_shape_tensor) { + const std::vector& list_new_shape_tensor) { std::vector vec_new_shape; vec_new_shape.reserve(list_new_shape_tensor.size()); for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { @@ -85,7 +85,7 @@ inline std::vector GetNewDataFromShapeTensorList( if (framework::TransToProtoVarType(tensor->dtype()) == framework::proto::VarType::INT32) { if (platform::is_gpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_shape.push_back(static_cast(*temp.data())); } else { @@ -94,7 +94,7 @@ inline std::vector GetNewDataFromShapeTensorList( } else if (framework::TransToProtoVarType(tensor->dtype()) == framework::proto::VarType::INT64) { if (platform::is_gpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_shape.push_back(*temp.data()); } else { @@ -148,7 +148,7 @@ struct UniformGenerator { template void UniformRandom(const framework::ExecutionContext& context, - framework::Tensor* tensor) { + phi::DenseTensor* tensor) { int64_t size = tensor->numel(); auto& dev_cxt = context.template device_context(); T* data = tensor->mutable_data(dev_cxt.GetPlace()); diff --git a/paddle/fluid/operators/uniform_random_op_mlu.cc b/paddle/fluid/operators/uniform_random_op_mlu.cc index 644fdad7fdc75..c37cb5dd2f31c 100644 --- a/paddle/fluid/operators/uniform_random_op_mlu.cc +++ b/paddle/fluid/operators/uniform_random_op_mlu.cc @@ -23,15 +23,15 @@ template class MLUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - framework::Tensor *tensor = nullptr; + phi::DenseTensor *tensor = nullptr; auto out_var = ctx.OutputVar("Out"); std::vector new_shape; auto list_new_shape_tensor = - ctx.MultiInput("ShapeTensorList"); + ctx.MultiInput("ShapeTensorList"); if (list_new_shape_tensor.size() > 0 || ctx.HasInput("ShapeTensor")) { if (ctx.HasInput("ShapeTensor")) { - auto *shape_tensor = ctx.Input("ShapeTensor"); + auto *shape_tensor = ctx.Input("ShapeTensor"); new_shape = GetNewDataFromShapeTensor(shape_tensor); } else if (list_new_shape_tensor.size() > 0) { new_shape = GetNewDataFromShapeTensorList(list_new_shape_tensor); diff --git a/paddle/fluid/operators/uniform_random_op_npu.cc b/paddle/fluid/operators/uniform_random_op_npu.cc index a16f8d25de3e7..81b84f5909d8e 100644 --- a/paddle/fluid/operators/uniform_random_op_npu.cc +++ b/paddle/fluid/operators/uniform_random_op_npu.cc @@ -26,14 +26,14 @@ template class NPUUniformRandomKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - framework::Tensor *tensor = nullptr; + phi::DenseTensor *tensor = nullptr; auto out_var = ctx.OutputVar("Out"); std::vector new_shape; auto list_new_shape_tensor = - ctx.MultiInput("ShapeTensorList"); + ctx.MultiInput("ShapeTensorList"); if (list_new_shape_tensor.size() > 0 || ctx.HasInput("ShapeTensor")) { if (ctx.HasInput("ShapeTensor")) { - auto *shape_tensor = ctx.Input("ShapeTensor"); + auto *shape_tensor = ctx.Input("ShapeTensor"); new_shape = GetNewDataFromShapeTensor(shape_tensor); } else if (list_new_shape_tensor.size() > 0) { new_shape = GetNewDataFromShapeTensorList(list_new_shape_tensor); diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 6bcb4d2c609f7..45b1e3c435bdc 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -31,15 +31,15 @@ namespace operators { template struct UniqueOpFunctor { - framework::Tensor* out_; - framework::Tensor* index_; - const framework::Tensor* in_; - framework::Tensor* count_; - - UniqueOpFunctor(framework::Tensor* out, - framework::Tensor* index, - const framework::Tensor* in, - framework::Tensor* count = nullptr) + phi::DenseTensor* out_; + phi::DenseTensor* index_; + const phi::DenseTensor* in_; + phi::DenseTensor* count_; + + UniqueOpFunctor(phi::DenseTensor* out, + phi::DenseTensor* index, + const phi::DenseTensor* in, + phi::DenseTensor* count = nullptr) : out_(out), index_(index), in_(in), count_(count) {} template @@ -113,9 +113,9 @@ struct UniqueOpFunctor { } }; -static std::vector Unbind(const framework::Tensor& in) { +static std::vector Unbind(const phi::DenseTensor& in) { int64_t size = in.dims()[0]; - std::vector tensors(size); + std::vector tensors(size); for (int64_t i = 0; i < size; ++i) { tensors[i] = in.Slice(i, i + 1); } @@ -123,7 +123,7 @@ static std::vector Unbind(const framework::Tensor& in) { } template -static bool Equal(const framework::Tensor& a, const framework::Tensor& b) { +static bool Equal(const phi::DenseTensor& a, const phi::DenseTensor& b) { if (a.numel() != b.numel()) { return false; } @@ -137,8 +137,8 @@ static bool Equal(const framework::Tensor& a, const framework::Tensor& b) { template static void UniqueFlattendTensor(const framework::ExecutionContext& context, - const framework::Tensor& in, - framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, bool return_index, bool return_inverse, bool return_counts) { @@ -149,7 +149,7 @@ static void UniqueFlattendTensor(const framework::ExecutionContext& context, std::copy(unique.begin(), unique.end(), out_data); if (return_index) { - auto* indices = context.Output("Indices"); + auto* indices = context.Output("Indices"); indices->Resize(phi::make_ddim({out->numel()})); auto indices_data = indices->mutable_data(context.GetPlace()); std::unordered_map indices_map; @@ -164,7 +164,7 @@ static void UniqueFlattendTensor(const framework::ExecutionContext& context, } if (return_inverse) { - auto* inverse = context.Output("Index"); + auto* inverse = context.Output("Index"); inverse->Resize(phi::make_ddim({in.numel()})); auto inverse_data = inverse->mutable_data(context.GetPlace()); std::unordered_map inverse_map; @@ -178,7 +178,7 @@ static void UniqueFlattendTensor(const framework::ExecutionContext& context, } if (return_counts) { - auto* count = context.Output("Counts"); + auto* count = context.Output("Counts"); count->Resize(phi::make_ddim({out->numel()})); auto count_data = count->mutable_data(context.GetPlace()); std::unordered_map counts_map; @@ -232,8 +232,8 @@ static ForwardIt UniqueDimImpl(const framework::ExecutionContext& context, template static void UniqueDim(const framework::ExecutionContext& context, - const framework::Tensor& in, - framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, bool return_index, bool return_inverse, bool return_counts, @@ -246,7 +246,7 @@ static void UniqueDim(const framework::ExecutionContext& context, std::vector in_trans_dims_vec(phi::vectorize(in.dims())); in_trans_dims_vec[axis] = in.dims()[0]; in_trans_dims_vec[0] = in.dims()[axis]; - framework::Tensor in_trans; + phi::DenseTensor in_trans; framework::DDim in_trans_dims = phi::make_ddim(in_trans_dims_vec); in_trans.Resize(in_trans_dims); in_trans.mutable_data(context.GetPlace()); @@ -278,7 +278,7 @@ static void UniqueDim(const framework::ExecutionContext& context, }); // sort tensor according to indices - framework::Tensor input_sorted; + phi::DenseTensor input_sorted; input_sorted.Resize(in_trans_dims); input_sorted.mutable_data(context.GetPlace()); InT* input_sorted_data = input_sorted.data(); @@ -288,11 +288,11 @@ static void UniqueDim(const framework::ExecutionContext& context, col * sizeof(InT)); } - std::vector input_unbind = Unbind(input_sorted); + std::vector input_unbind = Unbind(input_sorted); std::vector inverse_vec(sorted_indices_vec.size(), 0); std::vector counts_vec(sorted_indices_vec.size(), 0); std::vector indices_vec(sorted_indices_vec.size(), 0); - auto last = UniqueDimImpl::iterator, InT>( + auto last = UniqueDimImpl::iterator, InT>( context, input_unbind.begin(), input_unbind.end(), @@ -306,7 +306,7 @@ static void UniqueDim(const framework::ExecutionContext& context, indices_vec.end()); math::ConcatFunctor concat_functor; - framework::Tensor out_trans; + phi::DenseTensor out_trans; std::vector out_trans_dims_vec = in_trans_dims_vec; out_trans_dims_vec[0] = input_unbind.size(); out_trans.Resize(phi::make_ddim(out_trans_dims_vec)); @@ -319,17 +319,17 @@ static void UniqueDim(const framework::ExecutionContext& context, out_trans.dims().size(), dev_ctx, out_trans, out, permute); if (return_inverse) { - auto* inverse = context.Output("Index"); + auto* inverse = context.Output("Index"); framework::TensorFromVector(inverse_vec, context.device_context(), inverse); } if (return_counts) { - auto* count = context.Output("Counts"); + auto* count = context.Output("Counts"); framework::TensorFromVector(counts_vec, context.device_context(), count); } if (return_index) { - auto* indices = context.Output("Indices"); + auto* indices = context.Output("Indices"); framework::TensorFromVector(indices_vec, context.device_context(), indices); } } @@ -337,15 +337,15 @@ static void UniqueDim(const framework::ExecutionContext& context, template struct UniqueFlattendTensorFunctor { const framework::ExecutionContext& ctx_; - const framework::Tensor& in_; - framework::Tensor* out_; + const phi::DenseTensor& in_; + phi::DenseTensor* out_; const bool return_index_; const bool return_inverse_; const bool return_counts_; UniqueFlattendTensorFunctor(const framework::ExecutionContext& context, - const framework::Tensor& in, - framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, bool return_index, bool return_inverse, bool return_counts) @@ -366,16 +366,16 @@ struct UniqueFlattendTensorFunctor { template struct UniqueDimFunctor { const framework::ExecutionContext& ctx_; - const framework::Tensor& in_; - framework::Tensor* out_; + const phi::DenseTensor& in_; + phi::DenseTensor* out_; const int axis_; const bool return_index_; const bool return_inverse_; const bool return_counts_; UniqueDimFunctor(const framework::ExecutionContext& context, - const framework::Tensor& in, - framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const int axis, bool return_index, bool return_inverse, @@ -399,8 +399,8 @@ template class UniqueKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* x = context.Input("X"); - auto* out = context.Output("Out"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); auto data_type = static_cast( context.Attr("dtype")); if (data_type == framework::proto::VarType::INT32) { @@ -414,7 +414,7 @@ class UniqueKernel : public framework::OpKernel { x->numel())); } if (!context.Attr("is_sorted")) { - auto* index = context.Output("Index"); + auto* index = context.Output("Index"); framework::VisitDataType(data_type, UniqueOpFunctor(out, index, x)); return; diff --git a/paddle/fluid/operators/unique_with_counts_op.h b/paddle/fluid/operators/unique_with_counts_op.h index 227fdef222432..eb3cc2d4731df 100644 --- a/paddle/fluid/operators/unique_with_counts_op.h +++ b/paddle/fluid/operators/unique_with_counts_op.h @@ -31,10 +31,10 @@ class UniqueWithCountsKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto data_type = static_cast( context.Attr("dtype")); - auto* x = context.Input("X"); - auto* out = context.Output("Out"); - auto* index = context.Output("Index"); - auto* count = context.Output("Count"); + auto* x = context.Input("X"); + auto* out = context.Output("Out"); + auto* index = context.Output("Index"); + auto* count = context.Output("Count"); framework::VisitDataType(data_type, UniqueOpFunctor(out, index, x, count)); } diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc index f01ae5f142d28..2a4da567e5871 100644 --- a/paddle/fluid/operators/unsqueeze_op.cc +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -154,7 +154,7 @@ class UnsqueezeOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string &var_name, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const framework::OpKernelType &expected_kernel_type) const override { if (var_name == "AxesTensor" || var_name == "AxesTensorList") { return expected_kernel_type; diff --git a/paddle/fluid/operators/unsqueeze_op.h b/paddle/fluid/operators/unsqueeze_op.h index 774a8d553fd51..a082918c83dcc 100644 --- a/paddle/fluid/operators/unsqueeze_op.h +++ b/paddle/fluid/operators/unsqueeze_op.h @@ -37,11 +37,11 @@ class UnsqueezeKernel : public framework::OpKernel { bool need_resize_out_dims = false; if (axes.empty()) { auto axes_tensor_list = - context.MultiInput("AxesTensorList"); + context.MultiInput("AxesTensorList"); if (axes_tensor_list.size() > 0) { axes = GetDataFromTensorList(axes_tensor_list); } else if (context.HasInput("AxesTensor")) { - auto *axes_tensor = context.Input("AxesTensor"); + auto *axes_tensor = context.Input("AxesTensor"); axes = GetDataFromTensor(axes_tensor); } need_resize_out_dims = true; diff --git a/paddle/fluid/operators/unstack_op_mlu.cc b/paddle/fluid/operators/unstack_op_mlu.cc index 1819e37df597f..55171364377e0 100644 --- a/paddle/fluid/operators/unstack_op_mlu.cc +++ b/paddle/fluid/operators/unstack_op_mlu.cc @@ -22,8 +22,8 @@ template class UnStackMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto out = ctx.MultiOutput("Y"); + auto *x = ctx.Input("X"); + auto out = ctx.MultiOutput("Y"); int axis = ctx.Attr("axis"); if (axis < 0) axis += x->dims().size(); int num = x->dims()[axis]; @@ -56,8 +56,8 @@ template class UnStackGradMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto x = ctx.MultiInput(framework::GradVarName("Y")); - auto *y = ctx.Output(framework::GradVarName("X")); + auto x = ctx.MultiInput(framework::GradVarName("Y")); + auto *y = ctx.Output(framework::GradVarName("X")); int axis = ctx.Attr("axis"); if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); diff --git a/paddle/fluid/operators/unstack_op_npu.cc b/paddle/fluid/operators/unstack_op_npu.cc index 0c3d40279b01b..18b7de754c0ed 100644 --- a/paddle/fluid/operators/unstack_op_npu.cc +++ b/paddle/fluid/operators/unstack_op_npu.cc @@ -22,8 +22,8 @@ template class UnStackNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto *dy = ctx.Input("X"); - auto dx = ctx.MultiOutput("Y"); + auto *dy = ctx.Input("X"); + auto dx = ctx.MultiOutput("Y"); int axis = ctx.Attr("axis"); if (axis < 0) axis += dy->dims().size(); int num = dy->dims()[axis]; @@ -32,7 +32,7 @@ class UnStackNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - std::vector dx_list; + std::vector dx_list; for (int i = 0; i < num; i++) { dx[i]->mutable_data(ctx.GetPlace()); dx_list.push_back(*dx[i]); @@ -48,8 +48,8 @@ template class UnStackGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - auto x = ctx.MultiInput(framework::GradVarName("Y")); - auto *y = ctx.Output(framework::GradVarName("X")); + auto x = ctx.MultiInput(framework::GradVarName("Y")); + auto *y = ctx.Output(framework::GradVarName("X")); int axis = ctx.Attr("axis"); if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); @@ -58,7 +58,7 @@ class UnStackGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - std::vector x_list; + std::vector x_list; for (int i = 0; i < num; i++) { x_list.push_back(*x[i]); } diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index 7315f3a287ab5..adce638e9dd40 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -22,12 +22,12 @@ namespace paddle { namespace operators { template -inline std::vector GetDataFromTensor(const framework::Tensor* x) { +inline std::vector GetDataFromTensor(const phi::DenseTensor* x) { std::vector vec_new_data; if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::INT32) { auto* data = x->data(); - framework::Tensor cpu_attr_tensor; + phi::DenseTensor cpu_attr_tensor; if (!platform::is_cpu_place(x->place())) { paddle::framework::TensorCopySync( *x, platform::CPUPlace(), &cpu_attr_tensor); @@ -37,7 +37,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { } else if (framework::TransToProtoVarType(x->dtype()) == framework::proto::VarType::INT64) { auto* data = x->data(); - framework::Tensor cpu_attr_tensor; + phi::DenseTensor cpu_attr_tensor; if (!platform::is_cpu_place(x->place())) { paddle::framework::TensorCopySync( *x, platform::CPUPlace(), &cpu_attr_tensor); @@ -55,7 +55,7 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { template inline std::vector GetDataFromTensorList( - const std::vector& list_tensor) { + const std::vector& list_tensor) { std::vector vec_new_data; for (size_t i = 0; i < list_tensor.size(); ++i) { auto tensor = list_tensor[i]; @@ -70,7 +70,7 @@ inline std::vector GetDataFromTensorList( if (framework::TransToProtoVarType(tensor->dtype()) == framework::proto::VarType::INT32) { if (!platform::is_cpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); vec_new_data.push_back(static_cast(*temp.data())); } else { @@ -79,7 +79,7 @@ inline std::vector GetDataFromTensorList( } else if (framework::TransToProtoVarType(tensor->dtype()) == framework::proto::VarType::INT64) { if (!platform::is_cpu_place(tensor->place())) { - framework::Tensor temp; + phi::DenseTensor temp; paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); // NOTE: Converting int64 to int32 may cause data overflow. vec_new_data.push_back(static_cast(*temp.data())); @@ -105,7 +105,7 @@ inline framework::DDim GetShape(const framework::ExecutionContext& ctx) { } // 2. shape is a list/tuple containing Tensor - auto shape_tensor_list = ctx.MultiInput("ShapeTensorList"); + auto shape_tensor_list = ctx.MultiInput("ShapeTensorList"); if (shape_tensor_list.size() > 0) { auto vec_shape = GetDataFromTensorList(shape_tensor_list); return phi::make_ddim(vec_shape); @@ -117,10 +117,10 @@ inline framework::DDim GetShape(const framework::ExecutionContext& ctx) { } template -inline T GetValue(const framework::Tensor* x) { +inline T GetValue(const phi::DenseTensor* x) { T value = static_cast(0); if (!platform::is_cpu_place(x->place())) { - framework::Tensor cpu_x; + phi::DenseTensor cpu_x; framework::TensorCopy(*x, platform::CPUPlace(), &cpu_x); #if defined(PADDLE_WITH_ASCEND_CL) || defined(PADDLE_WITH_MLU) platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index eb7421019bd81..9ae05dd65a309 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; @@ -270,7 +270,7 @@ class CPUVarConv2dOPKernel : public framework::OpKernel { auto* bottom = ctx.Input("X"); auto* in_row = ctx.Input("ROW"); auto* in_col = ctx.Input("COLUMN"); - auto* w = ctx.Input("W"); + auto* w = ctx.Input("W"); auto* top = ctx.Output("Out"); auto* col = ctx.Output("Col"); @@ -451,7 +451,7 @@ class CPUVarConv2dOPGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); - auto* w = ctx.Input("W"); + auto* w = ctx.Input("W"); auto* col = ctx.Input("Col"); auto* out = ctx.Input("Out"); @@ -462,7 +462,7 @@ class CPUVarConv2dOPGradKernel : public framework::OpKernel { auto* d_out = ctx.Input(framework::GradVarName("Out")); auto* dx = ctx.Output(framework::GradVarName("X")); - auto* d_w = ctx.Output(framework::GradVarName("W")); + auto* d_w = ctx.Output(framework::GradVarName("W")); Tensor col_grad; col_grad.Resize(col->dims()); diff --git a/paddle/fluid/operators/var_conv_2d_op.h b/paddle/fluid/operators/var_conv_2d_op.h index b8d5de060934f..bb7bd25284a2b 100644 --- a/paddle/fluid/operators/var_conv_2d_op.h +++ b/paddle/fluid/operators/var_conv_2d_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; diff --git a/paddle/fluid/operators/where_index_op_mlu.cc b/paddle/fluid/operators/where_index_op_mlu.cc index 389f7960bcdc1..85f463f723ef5 100644 --- a/paddle/fluid/operators/where_index_op_mlu.cc +++ b/paddle/fluid/operators/where_index_op_mlu.cc @@ -20,14 +20,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class MLUWhereIndexKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* condition = context.Input("Condition"); - auto* out = context.Output("Out"); + auto* condition = context.Input("Condition"); + auto* out = context.Output("Out"); auto dims = condition->dims(); const int rank = dims.size(); @@ -54,7 +54,7 @@ class MLUWhereIndexKernel : public framework::OpKernel { } auto& dev_ctx = context.template device_context(); - framework::Tensor out_int32 = + phi::DenseTensor out_int32 = context.AllocateTmpTensor(out->dims(), dev_ctx); MLUCnnlTensorDesc out_int32_desc(out_int32); diff --git a/paddle/fluid/operators/where_index_op_npu.cc b/paddle/fluid/operators/where_index_op_npu.cc index cadb76d53f981..5b006cbdcf1b0 100644 --- a/paddle/fluid/operators/where_index_op_npu.cc +++ b/paddle/fluid/operators/where_index_op_npu.cc @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; template class NPUWhereIndexKernel : public framework::OpKernel { @@ -29,8 +29,8 @@ class NPUWhereIndexKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto& dev_ctx = context.template device_context(); - auto* condition = context.Input("Condition"); - auto* out = context.Output("Out"); + auto* condition = context.Input("Condition"); + auto* out = context.Output("Out"); auto dims = condition->dims(); const int rank = dims.size(); diff --git a/paddle/fluid/operators/where_op_mlu.cc b/paddle/fluid/operators/where_op_mlu.cc index 57c20ed14f1aa..53ae38bb48b27 100644 --- a/paddle/fluid/operators/where_op_mlu.cc +++ b/paddle/fluid/operators/where_op_mlu.cc @@ -24,10 +24,10 @@ template class WhereMLUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* condition = context.Input("Condition"); - auto* X = context.Input("X"); - auto* Y = context.Input("Y"); - auto* out = context.Output("Out"); + auto* condition = context.Input("Condition"); + auto* X = context.Input("X"); + auto* Y = context.Input("Y"); + auto* out = context.Output("Out"); auto place = context.GetPlace(); out->mutable_data(place); MLUCnnlTensorDesc x_desc(*X); diff --git a/paddle/fluid/operators/where_op_npu.cc b/paddle/fluid/operators/where_op_npu.cc index 68a5aef6f3097..bd30931580141 100644 --- a/paddle/fluid/operators/where_op_npu.cc +++ b/paddle/fluid/operators/where_op_npu.cc @@ -22,10 +22,10 @@ template class WhereNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* condition = ctx.Input("Condition"); - auto* X = ctx.Input("X"); - auto* Y = ctx.Input("Y"); - auto* out = ctx.Output("Out"); + auto* condition = ctx.Input("Condition"); + auto* X = ctx.Input("X"); + auto* Y = ctx.Input("Y"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); const auto& runner = @@ -42,10 +42,10 @@ template class WhereGradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* condition = ctx.Input("Condition"); - auto* dout_t = ctx.Input(framework::GradVarName("Out")); - auto* dx_t = ctx.Output(framework::GradVarName("X")); - auto* dy_t = ctx.Output(framework::GradVarName("Y")); + auto* condition = ctx.Input("Condition"); + auto* dout_t = ctx.Input(framework::GradVarName("Out")); + auto* dx_t = ctx.Output(framework::GradVarName("X")); + auto* dy_t = ctx.Output(framework::GradVarName("Y")); if (dx_t != nullptr) { dx_t->mutable_data(ctx.GetPlace()); @@ -58,7 +58,7 @@ class WhereGradNPUKernel : public framework::OpKernel { ctx.template device_context() .stream(); - framework::Tensor tensor_zeros(dout_t->dtype()); + phi::DenseTensor tensor_zeros(dout_t->dtype()); tensor_zeros.mutable_data(dout_t->dims(), ctx.GetPlace()); const auto& runner = NpuOpRunner("ZerosLike", {*dout_t}, {tensor_zeros}, {}); diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h index d2c96bd616861..4c949c66d1bc2 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_desc.h @@ -33,7 +33,6 @@ class DenseTensor; namespace paddle { namespace platform { -using framework::Tensor; template inline cudnnDataType_t ToCudnnDataType(const T& t) { @@ -141,7 +140,7 @@ class TensorDescriptor { } T* desc() { return desc_.get(); } T* desc() const { return desc_.get(); } - void set(const Tensor& tensor, const int groups = 1) { + void set(const phi::DenseTensor& tensor, const int groups = 1) { auto dims = phi::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; @@ -177,7 +176,7 @@ class TensorDescriptor { transformed_dims.data())); } - void set(const Tensor& tensor, const cudnnTensorFormat_t format) { + void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format) { auto dims = phi::vectorize(tensor.dims()); auto dtype = ToCudnnDataType(framework::TransToProtoVarType(tensor.dtype())); @@ -228,7 +227,7 @@ class FilterDescriptor { transformed_dims.data())); } - void set(const Tensor& tensor, + void set(const phi::DenseTensor& tensor, const cudnnTensorFormat_t format, const int groups = 1) { auto dims = phi::vectorize(tensor.dims()); diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h index 427901c1a7fd5..2f63ee880b13e 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper.h @@ -341,7 +341,7 @@ class ScopedDropoutDescriptor { const platform::Place& place, bool initialized, float dropout_prob_, - framework::Tensor* dropout_state_, + phi::DenseTensor* dropout_state_, int seed, size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test diff --git a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc index 2e58e71cc2c06..cbe322ef0c48c 100644 --- a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc +++ b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc @@ -29,7 +29,7 @@ TEST(TensorDescriptor, Empty) { } TEST(TensorDescriptor, Normal) { - framework::Tensor tt; + phi::DenseTensor tt; tt.Resize({2, 3, 4}); tt.mutable_data(platform::CPUPlace()); diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h index 6f943ea352696..158693f5dad70 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h @@ -32,7 +32,6 @@ class DenseTensor; namespace paddle { namespace platform { -using framework::Tensor; template inline miopenDataType_t ToCudnnDataType(const T& t) { diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h index 9cb5cdfbb164d..7a77a47189d11 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper.h @@ -285,7 +285,7 @@ class ScopedDropoutDescriptor { const platform::Place& place, bool initialized, float dropout_prob_, - framework::Tensor* dropout_state_, + phi::DenseTensor* dropout_state_, int seed, size_t state_size) { if (dropout_state_ == nullptr) { // for no dropout or test diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.cc b/paddle/fluid/platform/device/ipu/ipu_backend.cc index 9e960a99123c0..30ee14c44893a 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.cc +++ b/paddle/fluid/platform/device/ipu/ipu_backend.cc @@ -61,8 +61,8 @@ void IpuBackend::Compile(framework::ir::Graph* graph, VLOG(10) << "leave IpuBackend::Compile"; } -void IpuBackend::Run(const std::vector& inputs, - const std::vector& outputs, +void IpuBackend::Run(const std::vector& inputs, + const std::vector& outputs, const framework::ExecutionContext& ctx) { timer_->Start(); executor_->Run(inputs, outputs, ctx); diff --git a/paddle/fluid/platform/device/ipu/ipu_backend.h b/paddle/fluid/platform/device/ipu/ipu_backend.h index 1e083e7a3518c..1f15f3832db7f 100644 --- a/paddle/fluid/platform/device/ipu/ipu_backend.h +++ b/paddle/fluid/platform/device/ipu/ipu_backend.h @@ -54,8 +54,8 @@ class IpuBackend { const std::vector &fetch_list); // Run the compiled graph on ipu - void Run(const std::vector &inputs, - const std::vector &outputs, + void Run(const std::vector &inputs, + const std::vector &outputs, const framework::ExecutionContext &ctx); // Sync weights from IPU while training diff --git a/paddle/fluid/platform/device/ipu/ipu_utils.h b/paddle/fluid/platform/device/ipu/ipu_utils.h index 66094a0a4b0ab..9e075d3c06c90 100644 --- a/paddle/fluid/platform/device/ipu/ipu_utils.h +++ b/paddle/fluid/platform/device/ipu/ipu_utils.h @@ -25,7 +25,7 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" using float16 = paddle::platform::float16; -using Tensor = paddle::framework::Tensor; +using Tensor = phi::DenseTensor; using LoDTensor = paddle::framework::LoDTensor; using Scope = paddle::framework::Scope; using OpDesc = paddle::framework::OpDesc; diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.h b/paddle/fluid/platform/device/npu/npu_op_runner.h index 220dd23c3f14c..bdd25dd462706 100644 --- a/paddle/fluid/platform/device/npu/npu_op_runner.h +++ b/paddle/fluid/platform/device/npu/npu_op_runner.h @@ -28,7 +28,7 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; +using Tensor = phi::DenseTensor; using DataLayout = framework::DataLayout; using NPUAttribute = framework::NPUAttribute; using NPUAttributeMap = framework::NPUAttributeMap; diff --git a/paddle/fluid/platform/device_code_test.cc b/paddle/fluid/platform/device_code_test.cc index cb2649686ec02..c4ac44603dd2f 100644 --- a/paddle/fluid/platform/device_code_test.cc +++ b/paddle/fluid/platform/device_code_test.cc @@ -56,9 +56,9 @@ TEST(DeviceCode, cuda) { paddle::platform::CUDAPlace place = paddle::platform::CUDAPlace(0); paddle::platform::CUDADeviceCode code(place, "saxpy_kernel", saxpy_code); - paddle::framework::Tensor cpu_x; - paddle::framework::Tensor cpu_y; - paddle::framework::Tensor cpu_z; + phi::DenseTensor cpu_x; + phi::DenseTensor cpu_y; + phi::DenseTensor cpu_z; float scale = 2; auto dims = @@ -74,9 +74,9 @@ TEST(DeviceCode, cuda) { cpu_y.data()[i] = static_cast(0.5); } - paddle::framework::Tensor x; - paddle::framework::Tensor y; - paddle::framework::Tensor z; + phi::DenseTensor x; + phi::DenseTensor y; + phi::DenseTensor z; float* x_data = x.mutable_data(dims, place); float* y_data = y.mutable_data(dims, place); diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 0e97a68edfc9d..07f5f3408a30c 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include -#include "dnnl.hpp" +#include "dnnl.hpp" // NOLINT #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler/event_tracing.h" @@ -76,7 +76,7 @@ tf_pd MKLDNNBwdPrimitiveDesc(const Engine& e, return tf_pd(desc, e, p); } -inline void MatchShapeToLayout(framework::Tensor* tensor_in, +inline void MatchShapeToLayout(phi::DenseTensor* tensor_in, framework::DataLayout from, framework::DataLayout to) { auto print_dims = [](const std::vector& dims) { @@ -577,7 +577,7 @@ inline void GetGroupConvWeightsTz(std::vector& weights_tz, // NOLINT } inline void RegisterModelLayout( - std::vector>& ops, + std::vector>& ops, // NOLINT const platform::Place& place) { if (platform::is_cpu_place(place)) { // If there is already registered NHWC then quit this call diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index ca099cb65d67c..604ddb9555ea4 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -31,7 +31,7 @@ namespace paddle { namespace platform { using framework::DataLayout; -using framework::Tensor; + using user_function = std::function(const float*)>; using memory = dnnl::memory; @@ -236,7 +236,7 @@ class MatMulV2MKLDNNHandler } if (ctx.HasInput("ResidualData")) { - auto* residual_data = ctx.Input("ResidualData"); + auto* residual_data = ctx.Input("ResidualData"); auto residual_data_tz = phi::vectorize(residual_data->dims()); auto residual_data_md = memory::desc(residual_data_tz, MKLDNNGetDataType(), @@ -273,22 +273,20 @@ class MatMulV2MKLDNNHandler return fake_strides; } - std::shared_ptr AcquireWeightsMemory(const Tensor* input) { + std::shared_ptr AcquireWeightsMemory(const phi::DenseTensor* input) { const YT* input_data = input->data(); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->weights_desc(), to_void_cast(input_data)); } - std::shared_ptr AcquireDstMemory( - paddle::framework::Tensor* output) { + std::shared_ptr AcquireDstMemory(phi::DenseTensor* output) { // We cannot use base AcquireDstMemory as it makes an allocation request // base on DST memory primitive size. This is fine in general, but in MatMul // we have primitive that covers only one batch of Data and then shift - // pointer for every new batch. Hence Tensor size is bigger that dst memory - // primitive size. So would we request less memory that is there and it - // triggers an - // assertion. So as there is no 'any' format here we can leave default size - // of Tensor as computed in ComputeInferShape + // pointer for every new batch. Hence phi::DenseTensor size is bigger that + // dst memory primitive size. So would we request less memory that is there + // and it triggers an assertion. So as there is no 'any' format here we can + // leave default size of phi::DenseTensor as computed in ComputeInferShape OT* ptr = output->mutable_data(this->place_); return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr); } @@ -304,7 +302,7 @@ class ActivationMKLDNNHandler const framework::ExecutionContext& ctx, const dnnl::engine engine, Place cpu_place, - const framework::Tensor* x) + const phi::DenseTensor* x) : platform::MKLDNNHandlerNoCachingT(engine, @@ -314,7 +312,7 @@ class ActivationMKLDNNHandler if (ctx.Type() == "scale") { bool bias_after_scale = ctx.Attr("bias_after_scale"); - auto* scale_tensor = ctx.Input("ScaleTensor"); + auto* scale_tensor = ctx.Input("ScaleTensor"); alpha = (scale_tensor == nullptr) ? ctx.Attr("scale") : static_cast(*(scale_tensor->data())); @@ -327,10 +325,12 @@ class ActivationMKLDNNHandler beta *= alpha; } } else if (ctx.Type() == "clip") { - alpha = ctx.HasInput("Min") ? ctx.Input("Min")->data()[0] - : ctx.Attr("min"); - beta = ctx.HasInput("Max") ? ctx.Input("Max")->data()[0] - : ctx.Attr("max"); + alpha = ctx.HasInput("Min") + ? ctx.Input("Min")->data()[0] + : ctx.Attr("min"); + beta = ctx.HasInput("Max") + ? ctx.Input("Max")->data()[0] + : ctx.Attr("max"); } else { // paddle uses beta but mkldnn uses alpha for swish if (algorithm == dnnl::algorithm::eltwise_swish) { @@ -351,8 +351,8 @@ class ActivationMKLDNNHandler const framework::ExecutionContext& ctx, const dnnl::engine engine, Place cpu_place, - const framework::Tensor* x, - const Tensor* dout) + const phi::DenseTensor* x, + const phi::DenseTensor* dout) : platform::MKLDNNHandlerNoCachingT(engine, @@ -368,10 +368,12 @@ class ActivationMKLDNNHandler } if (ctx.Type() == "clip_grad") { - alpha = ctx.HasInput("Min") ? ctx.Input("Min")->data()[0] - : ctx.Attr("min"); - beta = ctx.HasInput("Max") ? ctx.Input("Max")->data()[0] - : ctx.Attr("max"); + alpha = ctx.HasInput("Min") + ? ctx.Input("Min")->data()[0] + : ctx.Attr("min"); + beta = ctx.HasInput("Max") + ? ctx.Input("Max")->data()[0] + : ctx.Attr("max"); } this->AcquireForwardPrimitiveDescriptor(dnnl::prop_kind::forward_training, @@ -384,7 +386,7 @@ class ActivationMKLDNNHandler } std::shared_ptr AcquireBackwardSrcMemory( - const framework::Tensor* input) { + const phi::DenseTensor* input) { const T* input_data = input->data(); return this->AcquireMemoryFromPrimitive(this->bwd_pd_->src_desc(), to_void_cast(input_data)); @@ -474,7 +476,7 @@ class ReorderMKLDNNHandler { return sub_mem_p; } - std::shared_ptr AcquireDstMemory(framework::Tensor* output, + std::shared_ptr AcquireDstMemory(phi::DenseTensor* output, const MKLDNNMemoryFormat& fmt, platform::Place place) { auto dst_md = platform::MKLDNNMemDesc(dims_, dtype_dst_, fmt); @@ -484,7 +486,7 @@ class ReorderMKLDNNHandler { } std::shared_ptr AcquireDstMemory( - framework::Tensor* output, + phi::DenseTensor* output, const dnnl::memory::desc& src_md, platform::Place place) { if (vtype_dst_ == vtype_) { @@ -501,7 +503,7 @@ class ReorderMKLDNNHandler { } std::shared_ptr AcquireDstMemory( - framework::Tensor* output, + phi::DenseTensor* output, const std::vector& dims, const MKLDNNMemoryFormat& fmt, platform::Place place) { diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 03aace9b78e38..c616d3fbebf11 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -206,7 +206,7 @@ void InitTensorWithTensor(TensorObject* self, } void InitTensorWithFrameworkTensor(TensorObject* self, - const framework::Tensor& src, + const phi::DenseTensor& src, const paddle::platform::Place& place, const std::string& name) { self->tensor.set_name(name); @@ -382,7 +382,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr, InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy); } -// initialize Tensor by Tensor or framework::Tensor (mix args and +// initialize Tensor by Tensor or phi::DenseTensor (mix args and // kwargs) automatically. void AutoInitTensorByTensor(TensorObject* py_tensor_ptr, std::unordered_map kws_map, @@ -428,7 +428,7 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr, InitTensorWithTensor(py_tensor_ptr, src_tensor, place, act_name); } else { // init by framework tensor - framework::Tensor src_tensor; + phi::DenseTensor src_tensor; if (kw_order_map["value"] <= args_num) { src_tensor = CastPyArg2FrameworkTensor( PyTuple_GET_ITEM(args, kw_order_map["value"] - 1), @@ -438,8 +438,8 @@ void AutoInitTensorByTensor(TensorObject* py_tensor_ptr, src_tensor = CastPyArg2FrameworkTensor(kws_map["value"], 0); } else { PADDLE_THROW(platform::errors::InvalidArgument( - "The first expected arguments is {value: framework::Tensor}, " - "but could not parse the first argument {value: framework::Tensor} " + "The first expected arguments is {value: phi::DenseTensor}, " + "but could not parse the first argument {value: phi::DenseTensor} " "successfully. " "Please check your input first and make sure you are on the right " "way.")); @@ -687,7 +687,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { PADDLE_THROW(platform::errors::InvalidArgument( "Could not parse the first keyword argument successfully, " "the first keyword argument is value, but it should be PyArray " - "or Tensor or framework::Tensor. " + "or Tensor or phi::DenseTensor. " "Please check your input first and make sure you are on the " "right way.")); } @@ -753,7 +753,7 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { } else { PADDLE_THROW(platform::errors::InvalidArgument( "We not only support construct Tensor from numpy value " - "or tensor(Tensor or framework::Tensor) " + "or tensor(Tensor or phi::DenseTensor) " "with python kwargs by this initializer, " "but also even support dtype to init a empty Tensor. " "Please check your input first and make sure you call the existed " @@ -789,10 +789,10 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { } else { PADDLE_THROW(platform::errors::InvalidArgument( "We support construct Tensor from numpy value " - "or tensor(Tensor or framework::Tensor) " + "or tensor(Tensor or phi::DenseTensor) " "with python args and kwargs by this initializer, " "but the first argument should be PyArray or Tensor or " - "framework::Tensor. " + "phi::DenseTensor. " "Please check your input first and make sure you call the existed " "constructor.")); } diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 956d8e5814cc0..b2a59140d695b 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -192,7 +192,7 @@ static PyObject* eager_api_read_next_tensor_list(PyObject* self, { eager_gil_scoped_release guard; tensor_list.reserve(tensor_base_list.size()); - auto func = [](framework::Tensor& tensor_base) { + auto func = [](phi::DenseTensor& tensor_base) { paddle::experimental::Tensor tensor( egr::Controller::Instance().GenerateUniqueName()); auto autograd_meta = egr::EagerUtils::autograd_meta(&tensor); diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 5233bbc832935..999a9e7ce8f4b 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -581,13 +581,11 @@ static PyObject* tensor__share_buffer_to(TensorObject* self, "Tensor %s has not been initialized! please initialize " "src tensor before share_buffer_with to other.", self->tensor.name())); - auto* src_tensor = - static_cast(self->tensor.impl().get()); + auto* src_tensor = static_cast(self->tensor.impl().get()); if (!dst_ptr->defined()) { dst_ptr->set_impl(std::make_shared()); } - auto dst_tensor = - static_cast(dst_ptr->impl().get()); + auto dst_tensor = static_cast(dst_ptr->impl().get()); dst_tensor->ShareBufferWith(*src_tensor); dst_tensor->ShareDataTypeWith(*src_tensor); RETURN_PY_NONE @@ -611,10 +609,8 @@ static PyObject* tensor__is_shared_buffer_with(TensorObject* self, if (!self->tensor.defined() || !dst_ptr->defined()) { return ToPyObject(res); } - auto* self_ptr = - static_cast(self->tensor.impl().get()); - auto dst_tensor = - static_cast(dst_ptr->impl().get()); + auto* self_ptr = static_cast(self->tensor.impl().get()); + auto dst_tensor = static_cast(dst_ptr->impl().get()); res = dst_tensor->IsSharedBufferWith(*self_ptr); return ToPyObject(res); EAGER_CATCH_AND_THROW_RETURN_NULL diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index df09dd7ec0a70..944fbb7faaf84 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -428,10 +428,10 @@ platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos) { return place; } -framework::Tensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) { +phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) { if (PyObject_IsInstance( obj, reinterpret_cast(g_framework_tensor_pytype))) { - return ::pybind11::handle(obj).cast(); + return ::pybind11::handle(obj).cast(); } else { PADDLE_THROW(platform::errors::InvalidArgument( "argument (position %d) must be " @@ -441,8 +441,8 @@ framework::Tensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos) { } } -std::vector CastPyArg2VectorOfTensorBase(PyObject* obj, - ssize_t arg_pos) { +std::vector CastPyArg2VectorOfTensorBase(PyObject* obj, + ssize_t arg_pos) { std::vector result; if (PyList_Check(obj)) { Py_ssize_t len = PyList_Size(obj); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 1f4a93dab91eb..f0ca654122937 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -64,7 +64,7 @@ std::shared_ptr CastPyArg2VarBase(PyObject* obj, std::vector CastPyArg2VectorOfTensor( PyObject* obj, ssize_t arg_pos); platform::Place CastPyArg2Place(PyObject* obj, ssize_t arg_pos); -framework::Tensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos); +phi::DenseTensor CastPyArg2FrameworkTensor(PyObject* obj, ssize_t arg_pos); std::vector CastPyArg2VectorOfTensorBase(PyObject* obj, ssize_t arg_pos); std::vector CastPyArg2VectorOfInt(PyObject* obj, size_t arg_pos); diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 3dc87f0f7cc04..5e19c4b557c6b 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -282,7 +282,7 @@ static void InitVarBaseFromNumpyWithArgDefault(imperative::VarBase *self, } static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const std::string &name) { VLOG(4) << "Init VarBase"; auto place = imperative::GetCurrentTracer()->ExpectedPlace(); @@ -306,7 +306,7 @@ static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self, template static void InitVarBaseFromTensorWithArg(imperative::VarBase *self, - const framework::Tensor &tensor, + const phi::DenseTensor &tensor, const P &place, const std::string &name) { VLOG(4) << "Init VarBase"; @@ -3031,9 +3031,9 @@ void BindImperative(py::module *m_ptr) { } // Select the index data to the buffer - auto index_select = [](const framework::Tensor &src_tensor, - const framework::Tensor &index_tensor, - framework::Tensor *buffer_tensor) { + auto index_select = [](const phi::DenseTensor &src_tensor, + const phi::DenseTensor &index_tensor, + phi::DenseTensor *buffer_tensor) { auto *src_data = src_tensor.data(); auto *index_data = index_tensor.data(); auto *buffer_data = diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index eb395ed2a144a..96e5b9f5c6b68 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -236,7 +236,7 @@ paddle_infer::PlaceType ToPaddleInferPlace( } void PaddleInferShareExternalData(paddle_infer::Tensor &tensor, // NOLINT - framework::Tensor input_tensor) { + phi::DenseTensor input_tensor) { std::vector shape; for (int i = 0; i < input_tensor.dims().size(); ++i) { shape.push_back(input_tensor.dims()[i]); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 9408429641a7f..9acacd5a0c7ab 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -679,7 +679,7 @@ PYBIND11_MODULE(libpaddle, m) { PyCapsule_SetName(dltensor->ptr(), "used_dltensor"); DLTensor dl = dmt->dl_tensor; - framework::Tensor tensor; + phi::DenseTensor tensor; if (dl.device.device_type == kDLCPU) { paddle::framework::TensorFromDLPack(dl, &tensor); diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index 8152a11c8193a..addc9b7c27d4c 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -196,8 +196,8 @@ namespace pybind { PyTypeObject *g_framework_tensor_pytype = nullptr; template -static void TensorCopyFrom(framework::Tensor *dst, - const framework::Tensor &src, +static void TensorCopyFrom(phi::DenseTensor *dst, + const phi::DenseTensor &src, const PlaceType &place, int64_t batch_size) { if (batch_size < 0) { @@ -210,134 +210,134 @@ static void TensorCopyFrom(framework::Tensor *dst, void BindTensor(pybind11::module &m) { // NOLINT using namespace paddle::framework; // NOLINT - py::class_ framework_tensor( + py::class_ framework_tensor( m, "Tensor", py::buffer_protocol()); g_framework_tensor_pytype = reinterpret_cast(framework_tensor.ptr()); framework_tensor .def("__array__", - [](framework::Tensor &self) { return TensorToPyArray(self); }) + [](phi::DenseTensor &self) { return TensorToPyArray(self); }) .def("_ptr", - [](const framework::Tensor &self) { + [](const phi::DenseTensor &self) { return reinterpret_cast(self.data()); }) - .def("_slice", &framework::Tensor::Slice) - .def("_numel", &framework::Tensor::numel) + .def("_slice", &phi::DenseTensor::Slice) + .def("_numel", &phi::DenseTensor::numel) .def("_is_initialized", - [](const framework::Tensor &self) { return self.IsInitialized(); }) + [](const phi::DenseTensor &self) { return self.IsInitialized(); }) .def("_get_dims", - [](const framework::Tensor &self) { return vectorize(self.dims()); }) + [](const phi::DenseTensor &self) { return vectorize(self.dims()); }) .def("_set_dims", - [](framework::Tensor &self, const std::vector &dim) { + [](phi::DenseTensor &self, const std::vector &dim) { self.Resize(phi::make_ddim(dim)); }) .def("_set_layout", - [](framework::Tensor &self, const std::string &layout) { + [](phi::DenseTensor &self, const std::string &layout) { self.set_layout(StringToDataLayout(layout)); }) .def("_alloc_float", - [](framework::Tensor &self, paddle::platform::CustomPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CustomPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](framework::Tensor &self, paddle::platform::CUDAPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](framework::Tensor &self, paddle::platform::XPUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::XPUPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](framework::Tensor &self, paddle::platform::CPUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](framework::Tensor &self, paddle::platform::NPUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::NPUPlace &place) { self.mutable_data(place); }) .def("_alloc_float", - [](framework::Tensor &self, paddle::platform::MLUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::MLUPlace &place) { self.mutable_data(place); }) .def("_alloc_double", - [](framework::Tensor &self, paddle::platform::CPUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](framework::Tensor &self, paddle::platform::CPUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CPUPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](framework::Tensor &self, paddle::platform::CustomPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CustomPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](framework::Tensor &self, paddle::platform::XPUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::XPUPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](framework::Tensor &self, paddle::platform::CUDAPlace &place) { + [](phi::DenseTensor &self, paddle::platform::CUDAPlace &place) { self.mutable_data(place); }) .def("_alloc_int", - [](framework::Tensor &self, paddle::platform::MLUPlace &place) { + [](phi::DenseTensor &self, paddle::platform::MLUPlace &place) { self.mutable_data(place); }) - .def("_alloc_int", - [](framework::Tensor &self, - paddle::platform::CUDAPinnedPlace &place) { - self.mutable_data(place); - }) - .def("_alloc_float", - [](framework::Tensor &self, - paddle::platform::CUDAPinnedPlace &place) { - self.mutable_data(place); - }) + .def( + "_alloc_int", + [](phi::DenseTensor &self, paddle::platform::CUDAPinnedPlace &place) { + self.mutable_data(place); + }) + .def( + "_alloc_float", + [](phi::DenseTensor &self, paddle::platform::CUDAPinnedPlace &place) { + self.mutable_data(place); + }) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::CPUPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( self.mutable_data(place, framework::TransToPhiDataType(type))); }) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::CustomPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( self.mutable_data(place, framework::TransToPhiDataType(type))); }) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::XPUPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( self.mutable_data(place, framework::TransToPhiDataType(type))); }) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::CUDAPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( self.mutable_data(place, framework::TransToPhiDataType(type))); }) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::CUDAPinnedPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( self.mutable_data(place, framework::TransToPhiDataType(type))); }) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::MLUPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( self.mutable_data(place, framework::TransToPhiDataType(type))); }) - .def("_clear", &framework::Tensor::clear) + .def("_clear", &phi::DenseTensor::clear) .def("_mutable_data", - [](framework::Tensor &self, + [](phi::DenseTensor &self, paddle::platform::NPUPlace &place, paddle::framework::proto::VarType::Type type) { return reinterpret_cast( @@ -453,7 +453,7 @@ void BindTensor(pybind11::module &m) { // NOLINT .def( "shape", - [](framework::Tensor &self) { return vectorize(self.dims()); }, + [](phi::DenseTensor &self) { return vectorize(self.dims()); }, R"DOC( Return the shape of Tensor. @@ -472,7 +472,7 @@ void BindTensor(pybind11::module &m) { // NOLINT print(t.shape()) # [5, 30] )DOC") .def("_to_dlpack", - [](framework::Tensor &self) { + [](phi::DenseTensor &self) { DLPackTensor dlpack_tensor(self, 1); DLManagedTensor *dmt = dlpack_tensor.ToDLManagedTensor(); auto capsule = py::capsule( @@ -496,25 +496,25 @@ void BindTensor(pybind11::module &m) { // NOLINT .def("_get_float_element", TensorGetElement) .def("_set_double_element", TensorSetElement) .def("_get_double_element", TensorGetElement) - .def("_place", [](framework::Tensor &self) { return self.place(); }) + .def("_place", [](phi::DenseTensor &self) { return self.place(); }) .def("_dtype", - [](framework::Tensor &self) { + [](phi::DenseTensor &self) { return framework::TransToProtoVarType(self.type()); }) .def("_layout", - [](framework::Tensor &self) { + [](phi::DenseTensor &self) { return DataLayoutToString(self.layout()); }) - .def("_share_data_with", &framework::Tensor::ShareDataWith) + .def("_share_data_with", &phi::DenseTensor::ShareDataWith) .def("__getitem__", PySliceTensor, py::return_value_policy::reference) .def("__str__", - [](const framework::Tensor &self) { + [](const phi::DenseTensor &self) { std::stringstream ostr; ostr << self; return ostr.str(); }) /* ------ End of original Tensor ------ */ .def("__init__", - [](framework::Tensor &instance, + [](phi::DenseTensor &instance, const std::vector> &recursive_sequence_lengths) { LoD new_lod; @@ -531,11 +531,11 @@ void BindTensor(pybind11::module &m) { // NOLINT "invalid, " "the LoD converted by recursive_sequence_lengths is %s", new_lod)); - new (&instance) framework::Tensor(new_offset_lod); + new (&instance) phi::DenseTensor(new_offset_lod); }) .def("__init__", - [](framework::Tensor &instance) { - new (&instance) framework::Tensor(); + [](phi::DenseTensor &instance) { + new (&instance) phi::DenseTensor(); }) // We implement offset based LOD in C++ while we use length based with // Python API. So we changed set_lod to set_recursive_sequence_lengths @@ -545,7 +545,7 @@ void BindTensor(pybind11::module &m) { // NOLINT // https://github.com/PaddlePaddle/Paddle/issues/10855 .def( "set_lod", - [](framework::Tensor &self, + [](phi::DenseTensor &self, const std::vector> &lod) { // the input lod is offset-based level-of-detail info LoD new_lod; @@ -581,7 +581,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") .def( "set_recursive_sequence_lengths", - [](framework::Tensor &self, + [](phi::DenseTensor &self, const std::vector> &recursive_sequence_lengths) { // the input recursive_sequence_lengths is length-based @@ -631,7 +631,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") .def( "lod", - [](framework::Tensor &self) -> std::vector> { + [](phi::DenseTensor &self) -> std::vector> { // output the offset-based lod info LoD lod = self.lod(); std::vector> new_lod; @@ -659,7 +659,7 @@ void BindTensor(pybind11::module &m) { // NOLINT // Set above comments of set_lod. .def( "recursive_sequence_lengths", - [](framework::Tensor &self) -> std::vector> { + [](phi::DenseTensor &self) -> std::vector> { // output the length-based lod info LoD lod = phi::ConvertToLengthBasedLoD(self.lod()); std::vector> new_lod; @@ -687,7 +687,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") .def( "has_valid_recursive_sequence_lengths", - [](framework::Tensor &self) -> bool { + [](phi::DenseTensor &self) -> bool { // Check that the lod info is valid and match the outermost // dimension of the Tensor data return CheckLoD(self.lod(), vectorize(self.dims()).front()); @@ -710,18 +710,18 @@ void BindTensor(pybind11::module &m) { // NOLINT print(t.has_valid_recursive_sequence_lengths()) # True )DOC") .def("_as_type", - [](const framework::Tensor &self, + [](const phi::DenseTensor &self, paddle::framework::proto::VarType::Type type) { - framework::Tensor dst; + phi::DenseTensor dst; if (self.IsInitialized() && self.numel() > 0) { TransDataType(self, type, &dst); } return dst; }) .def("_copy", - [](const framework::Tensor &self, const platform::Place &place) { + [](const phi::DenseTensor &self, const platform::Place &place) { // follow fetch_op's inplementation - framework::Tensor dst; + phi::DenseTensor dst; if (self.IsInitialized() && self.numel() > 0) { TensorCopySync(self, place, &dst); } else { @@ -737,7 +737,7 @@ void BindTensor(pybind11::module &m) { // NOLINT }) #ifdef PADDLE_WITH_CUDA .def("_share_buffer_with", - [](framework::Tensor &self, const framework::Tensor src, + [](phi::DenseTensor &self, const phi::DenseTensor src, py::tuple t) { auto *cuda_ipc_allocation = dynamic_cast( @@ -779,7 +779,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") .def("_share_cuda", - [](framework::Tensor self) { + [](phi::DenseTensor self) { if (!self.IsInitialized() || self.numel() == 0) throw std::runtime_error( "Tensor not initialized or numel is 0. could not pass " @@ -841,7 +841,7 @@ void BindTensor(pybind11::module &m) { // NOLINT "Invalid Tensor meta info for shared cuda tensor!"); // 1. Create a new C++ instance - framework::Tensor tensor; + phi::DenseTensor tensor; // 2. Rebuild Allocation from handle const std::string &handle = t[0].cast(); @@ -883,7 +883,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") #endif .def("_share_filename", - [](framework::Tensor &self) { + [](phi::DenseTensor &self) { if (!self.IsInitialized() || self.numel() == 0) throw std::runtime_error( "Tensor not initialized or numel is 0. could not pass to " @@ -955,7 +955,7 @@ void BindTensor(pybind11::module &m) { // NOLINT if (t.size() != 5) throw std::runtime_error("Invalid Tensor meta info state!"); - framework::Tensor tensor; + phi::DenseTensor tensor; // 2. Rebuild Allocation const std::string &ipc_name = t[0].cast(); @@ -993,7 +993,7 @@ void BindTensor(pybind11::module &m) { // NOLINT )DOC") .def("_shared_incref", - [](framework::Tensor &self) { + [](phi::DenseTensor &self) { auto *mmap_allocation = dynamic_cast< memory::allocation::RefcountedMemoryMapAllocation *>( self.Holder().get()); @@ -1005,7 +1005,7 @@ void BindTensor(pybind11::module &m) { // NOLINT Increase reference count of share_filename tensor. )DOC") .def("_shared_decref", - [](framework::Tensor &self) { + [](phi::DenseTensor &self) { auto *mmap_allocation = dynamic_cast< memory::allocation::RefcountedMemoryMapAllocation *>( self.Holder().get()); @@ -1017,7 +1017,7 @@ void BindTensor(pybind11::module &m) { // NOLINT Decrease reference count of share_filename tensor. )DOC") .def(py::pickle( - [](const framework::Tensor &t) { // __getstate__ + [](const phi::DenseTensor &t) { // __getstate__ auto holder = t.Holder(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(holder->place()), true, platform::errors::PreconditionNotMet( @@ -1042,7 +1042,7 @@ void BindTensor(pybind11::module &m) { // NOLINT throw std::runtime_error("Invalid Tensor state!"); // 1. Create a new C++ instance - framework::Tensor tensor; + phi::DenseTensor tensor; // 2. Rebuild Allocation const std::string &ipc_name = t[0].cast(); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 4b01f2b568b0f..0003111f0cad3 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -264,7 +264,7 @@ inline std::string TensorDTypeToPyDTypeStr( } // namespace details template -T TensorGetElement(const framework::Tensor &self, size_t offset) { +T TensorGetElement(const phi::DenseTensor &self, size_t offset) { PADDLE_ENFORCE_LT(offset, self.numel(), platform::errors::InvalidArgument( @@ -314,7 +314,7 @@ T TensorGetElement(const framework::Tensor &self, size_t offset) { } template -void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { +void TensorSetElement(phi::DenseTensor *self, size_t offset, T elem) { PADDLE_ENFORCE_LT(offset, self->numel(), platform::errors::InvalidArgument( @@ -362,7 +362,7 @@ void TensorSetElement(framework::Tensor *self, size_t offset, T elem) { template void SetTensorFromPyArrayT( - framework::Tensor *self, + phi::DenseTensor *self, const py::array_t &array, const P &place, bool zero_copy) { @@ -502,7 +502,7 @@ void SetTensorFromPyArrayT( } template -void SetTensorFromPyArray(framework::Tensor *self, +void SetTensorFromPyArray(phi::DenseTensor *self, const py::object &obj, const P &place, bool zero_copy) { @@ -679,8 +679,8 @@ void SetUVATensorFromPyArray( } template -void _sliceCompute(const framework::Tensor *in, - framework::Tensor *out, +void _sliceCompute(const phi::DenseTensor *in, + phi::DenseTensor *out, const phi::CPUContext &ctx, const std::vector &axes, const std::vector &starts) { @@ -714,8 +714,8 @@ void _sliceCompute(const framework::Tensor *in, } template -void _concatCompute(const std::vector &ins, - paddle::framework::Tensor *out, +void _concatCompute(const std::vector &ins, + phi::DenseTensor *out, const phi::CPUContext &ctx, int64_t axis) { if (axis == 0 && ins.size() < 10) { @@ -739,7 +739,7 @@ void _concatCompute(const std::vector &ins, } } -inline void _getSliceinfo(const framework::Tensor &self, +inline void _getSliceinfo(const phi::DenseTensor &self, py::object obj, const int64_t dim, int64_t *pstart, @@ -791,9 +791,9 @@ inline void _getSliceinfo(const framework::Tensor &self, } } -inline framework::Tensor *_getTensor(const framework::Tensor &self, - const framework::DDim &ddim) { - framework::Tensor *output = new framework::Tensor(); +inline phi::DenseTensor *_getTensor(const phi::DenseTensor &self, + const framework::DDim &ddim) { + phi::DenseTensor *output = new phi::DenseTensor(); output->Resize(ddim); auto place = self.place(); if (platform::is_cpu_place(place)) { @@ -819,8 +819,8 @@ inline framework::Tensor *_getTensor(const framework::Tensor &self, } template -void _sliceDapper(const framework::Tensor *in, - framework::Tensor *out, +void _sliceDapper(const phi::DenseTensor *in, + phi::DenseTensor *out, const phi::CPUContext &ctx, const std::vector &axes, const std::vector &starts, @@ -861,32 +861,32 @@ void _sliceDapper(const framework::Tensor *in, } template -inline framework::Tensor *_sliceWrapper(const framework::Tensor &self, - const phi::CPUContext &ctx, - py::object obj, - int dim, - int64_t start, - int64_t slicelength) { +inline phi::DenseTensor *_sliceWrapper(const phi::DenseTensor &self, + const phi::CPUContext &ctx, + py::object obj, + int dim, + int64_t start, + int64_t slicelength) { framework::DDim dstDDim = self.dims(); dstDDim[dim] = static_cast(slicelength); std::vector axes({dim}); std::vector starts({static_cast(start)}); - framework::Tensor *output = _getTensor(self, dstDDim); + phi::DenseTensor *output = _getTensor(self, dstDDim); _sliceDapper(&self, output, ctx, axes, starts, dstDDim.size()); return output; } template -inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self, - py::object obj, - int dim) { +inline phi::DenseTensor *_sliceAndConcat(const phi::DenseTensor &self, + py::object obj, + int dim) { phi::CPUContext ctx; int64_t start, stop, step, slicelength; _getSliceinfo(self, obj, dim, &start, &stop, &step, &slicelength); if (step == 1 || slicelength == 1) { return _sliceWrapper(self, ctx, obj, dim, start, slicelength); } else { - std::vector ins; + std::vector ins; for (auto i = 0; i < slicelength; ++i, start += step) { ins.emplace_back(*_sliceWrapper(self, ctx, obj, dim, start, 1)); } @@ -894,15 +894,15 @@ inline framework::Tensor *_sliceAndConcat(const framework::Tensor &self, // do the concat operation framework::DDim dstDDim = self.dims(); dstDDim[dim] = static_cast(slicelength); - framework::Tensor *output1 = _getTensor(self, dstDDim); + phi::DenseTensor *output1 = _getTensor(self, dstDDim); _concatCompute(ins, output1, ctx, dim); return output1; } } -inline framework::Tensor *_sliceTensor(const framework::Tensor &self, - py::object obj, - int dim) { +inline phi::DenseTensor *_sliceTensor(const phi::DenseTensor &self, + py::object obj, + int dim) { auto src_type = framework::TransToProtoVarType(self.dtype()); switch (src_type) { case framework::proto::VarType::FP16: @@ -936,12 +936,12 @@ inline framework::Tensor *_sliceTensor(const framework::Tensor &self, } } -inline framework::Tensor *_pySliceTensor(const framework::Tensor &self, - py::object obj) { +inline phi::DenseTensor *_pySliceTensor(const phi::DenseTensor &self, + py::object obj) { if (py::isinstance(obj)) { py::list l = static_cast(obj); - std::unique_ptr target; - framework::Tensor *src = const_cast(&self); + std::unique_ptr target; + phi::DenseTensor *src = const_cast(&self); for (auto i = 0; i < static_cast(l.size()); ++i) { src = _sliceTensor(*src, l[i], i); if (i + 1 == static_cast(l.size())) { @@ -956,15 +956,15 @@ inline framework::Tensor *_pySliceTensor(const framework::Tensor &self, } } -inline framework::Tensor *PySliceTensor(const framework::Tensor &self, - py::object obj) { +inline phi::DenseTensor *PySliceTensor(const phi::DenseTensor &self, + py::object obj) { if (platform::is_gpu_place(self.place())) { - std::unique_ptr holder; - framework::Tensor src; + std::unique_ptr holder; + phi::DenseTensor src; framework::TensorCopySync(self, platform::CPUPlace(), &src); - framework::Tensor *output = _pySliceTensor(src, obj); + phi::DenseTensor *output = _pySliceTensor(src, obj); holder.reset(output); - framework::Tensor *dst = _getTensor(*output, output->dims()); + phi::DenseTensor *dst = _getTensor(*output, output->dims()); framework::TensorCopySync(*output, self.place(), dst); return dst; } else { @@ -972,7 +972,7 @@ inline framework::Tensor *PySliceTensor(const framework::Tensor &self, } } -inline py::array TensorToPyArray(const framework::Tensor &tensor, +inline py::array TensorToPyArray(const phi::DenseTensor &tensor, bool need_deep_copy = false) { if (!tensor.IsInitialized()) { return py::array(); diff --git a/paddle/infrt/api/infrt_api.cc b/paddle/infrt/api/infrt_api.cc index a58c6cc5b86ef..e2fdd9a487121 100644 --- a/paddle/infrt/api/infrt_api.cc +++ b/paddle/infrt/api/infrt_api.cc @@ -121,11 +121,11 @@ class PredictExecutor : public MlirToRuntimeTranslator { int GetInputNum() { return inputs_.size(); } - ::phi::DenseTensor* GetInput(int i) { return inputs_[i]; } + ::Tensor* GetInput(int i) { return inputs_[i]; } int GetOutputNum() { return outputs_.size(); } - ::phi::DenseTensor* GetOutput(int i) { return outputs_[i]; } + ::Tensor* GetOutput(int i) { return outputs_[i]; } private: void Init(::infrt::phi::DenseTensorMap&& map) { @@ -158,10 +158,10 @@ class PredictExecutor : public MlirToRuntimeTranslator { AddValue(predict_func.getArgument(i), value); } else if (type.isa<::infrt::DenseTensorType>()) { // this param is an input Tensor - auto dht = ::phi::DenseTensor(); + auto dht = ::Tensor(); auto* value = new host_context::Value(std::move(dht)); arguments_.push_back(value); - inputs_.push_back(&(value->get<::phi::DenseTensor>())); + inputs_.push_back(&(value->get<::Tensor>())); } else { llvm_unreachable("The input type has not been supported by predictor."); } @@ -174,12 +174,12 @@ class PredictExecutor : public MlirToRuntimeTranslator { auto operand = last_op.getOperand(i); if (operand.getType().isa<::infrt::DenseTensorType>()) { auto r = impl_->value_map.try_emplace( - operand, ValueRef(new host_context::Value(::phi::DenseTensor()))); + operand, ValueRef(new host_context::Value(::Tensor()))); CHECK(r.second) << "Duplicate add mlir value [" << DumpToString(operand) << "]"; auto* value = r.first->second.get(); results_.push_back(ValueRef(value)); - outputs_.push_back(&(value->get<::phi::DenseTensor>())); + outputs_.push_back(&(value->get<::Tensor>())); } else { llvm_unreachable("infrt.return only supports DenseTensor now."); } @@ -200,9 +200,9 @@ class PredictExecutor : public MlirToRuntimeTranslator { private: KernelRegistry* registry_{}; MlirFunctionExecutable* function_executable_; - llvm::SmallVector<::phi::DenseTensor*, 1> inputs_; + llvm::SmallVector<::Tensor*, 1> inputs_; llvm::SmallVector arguments_; - llvm::SmallVector<::phi::DenseTensor*, 1> outputs_; + llvm::SmallVector<::Tensor*, 1> outputs_; llvm::SmallVector results_; }; @@ -322,13 +322,13 @@ int InfRtPredictor::Init(const InfRtConfig& config) { int InfRtPredictor::GetInputNum() { return impl_->executor->GetInputNum(); } -::phi::DenseTensor* InfRtPredictor::GetInput(int i) { +::Tensor* InfRtPredictor::GetInput(int i) { return impl_->executor->GetInput(i); } int InfRtPredictor::GetOutputNum() { return impl_->executor->GetOutputNum(); } -::phi::DenseTensor* InfRtPredictor::GetOutput(int i) { +::Tensor* InfRtPredictor::GetOutput(int i) { return impl_->executor->GetOutput(i); } diff --git a/paddle/infrt/api/infrt_api.h b/paddle/infrt/api/infrt_api.h index fcaed78bdd9ae..511d51648d7f3 100644 --- a/paddle/infrt/api/infrt_api.h +++ b/paddle/infrt/api/infrt_api.h @@ -61,9 +61,9 @@ class InfRtPredictor { void Run(); int Init(const InfRtConfig& config); int GetInputNum(); - ::phi::DenseTensor* GetInput(int i); + ::Tensor* GetInput(int i); int GetOutputNum(); - ::phi::DenseTensor* GetOutput(int i); + ::Tensor* GetOutput(int i); protected: struct Impl; diff --git a/paddle/infrt/api/infrt_api_test.cc.in b/paddle/infrt/api/infrt_api_test.cc.in index f7d1c97603c63..32c8c25cd29c3 100644 --- a/paddle/infrt/api/infrt_api_test.cc.in +++ b/paddle/infrt/api/infrt_api_test.cc.in @@ -40,7 +40,7 @@ TEST(InfRtPredictor, predictor) { std::unique_ptr predictor = CreateInfRtPredictor(config); ::infrt::backends::CpuPhiAllocator cpu_allocator; - ::phi::DenseTensor* input = predictor->GetInput(0); + ::Tensor* input = predictor->GetInput(0); input->Resize({16, 784}); input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32); auto* input_data = reinterpret_cast(input->data()); @@ -68,7 +68,7 @@ TEST(InfRtPredictor, cpu_predictor) { std::unique_ptr predictor = CreateInfRtPredictor(config); ::infrt::backends::CpuPhiAllocator cpu_allocator; - ::phi::DenseTensor* input = predictor->GetInput(0); + ::Tensor* input = predictor->GetInput(0); input->Resize({2, 3, 256, 256}); input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32); auto* input_data = reinterpret_cast(input->data()); @@ -121,7 +121,7 @@ TEST(InfRtPredictor, trt_predictor) { std::unique_ptr predictor = CreateInfRtPredictor(config); ::infrt::backends::CpuPhiAllocator cpu_allocator; - ::phi::DenseTensor* input = predictor->GetInput(0); + ::Tensor* input = predictor->GetInput(0); input->Resize({2, 3, 256, 256}); input->AllocateFrom(&cpu_allocator, ::phi::DataType::FLOAT32); auto* input_data = reinterpret_cast(input->data()); @@ -166,7 +166,7 @@ TEST(InfRtPredictor, gpu_predictor) { ::infrt::backends::GpuPhiAllocator gpu_allocator; - ::phi::DenseTensor* input = predictor->GetInput(0); + ::Tensor* input = predictor->GetInput(0); input->Resize({2, 3, 256, 256}); input->AllocateFrom(&gpu_allocator, ::phi::DataType::FLOAT32); auto* data = reinterpret_cast(input->data()); diff --git a/paddle/infrt/backends/tensorrt/trt_engine.cc b/paddle/infrt/backends/tensorrt/trt_engine.cc index a539078e4af4d..97f36829ddaee 100644 --- a/paddle/infrt/backends/tensorrt/trt_engine.cc +++ b/paddle/infrt/backends/tensorrt/trt_engine.cc @@ -244,11 +244,11 @@ bool TrtEngine::SetupNetworkAndConfig(const BuildOptions& build, } void TrtEngine::PrepareOutputHandle(const std::string& out_name) { - ::phi::DenseTensor t; + ::Tensor t; outputs_.emplace(out_name, t); } -::phi::DenseTensor* TrtEngine::GetOutput(const std::string& name) { +::Tensor* TrtEngine::GetOutput(const std::string& name) { return &outputs_[name]; } @@ -256,7 +256,7 @@ size_t TrtEngine::GetOutputNum() const { return outputs_.size(); } bool TrtEngine::SetUpInference( const InferenceOptions& inference, - const std::unordered_map& inputs) { + const std::unordered_map& inputs) { // TODO(wilber): now only create one exec_context FreshDeviceId(); CHECK(engine_ != nullptr); diff --git a/paddle/infrt/backends/tensorrt/trt_engine.h b/paddle/infrt/backends/tensorrt/trt_engine.h index 44f36a84cb5dc..5d7787f68a0fd 100644 --- a/paddle/infrt/backends/tensorrt/trt_engine.h +++ b/paddle/infrt/backends/tensorrt/trt_engine.h @@ -80,16 +80,15 @@ class TrtEngine { void Run(const ::phi::GPUContext& ctx); // TODO(wilber): How to support multiple execution contexts? - bool SetUpInference( - const InferenceOptions& inference, - const std::unordered_map& inputs); + bool SetUpInference(const InferenceOptions& inference, + const std::unordered_map& inputs); void GetEngineInfo(); void PrepareOutputHandle(const std::string& out_name); // TODO(wilber): The output tensor names are: output_0, output_1, ... - ::phi::DenseTensor* GetOutput(const std::string&); + ::Tensor* GetOutput(const std::string&); size_t GetOutputNum() const; @@ -119,7 +118,7 @@ class TrtEngine { std::vector> bindings_; int device_id_{0}; bool is_dynamic_shape_{false}; - std::unordered_map outputs_; + std::unordered_map outputs_; }; } // namespace tensorrt diff --git a/paddle/infrt/backends/tensorrt/trt_utils.h b/paddle/infrt/backends/tensorrt/trt_utils.h index b2d5659fd2520..e61b76e542e12 100644 --- a/paddle/infrt/backends/tensorrt/trt_utils.h +++ b/paddle/infrt/backends/tensorrt/trt_utils.h @@ -93,7 +93,7 @@ class TrtLogger : public nvinfer1::ILogger { struct Binding { bool is_input{false}; nvinfer1::DataType data_type{nvinfer1::DataType::kFLOAT}; - ::phi::DenseTensor* buffer{nullptr}; + ::Tensor* buffer{nullptr}; std::string name; }; @@ -104,7 +104,7 @@ class Bindings { void AddBinding(int32_t b, const std::string& name, bool is_input, - ::phi::DenseTensor* buffer, + ::Tensor* buffer, nvinfer1::DataType data_type) { while (bindings_.size() <= static_cast(b)) { bindings_.emplace_back(); diff --git a/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.cc b/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.cc index 6a9f828dc9524..23d411021f969 100644 --- a/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.cc +++ b/paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.cc @@ -80,7 +80,7 @@ void InfrtWeightsFoldPass::runOnFunction() { if (auto tensor_map_get_op = llvm::dyn_cast<::infrt::phi::TensorMapGetTensorOp>(user_op)) { ::llvm::StringRef arg_name = tensor_map_get_op.name(); - ::phi::DenseTensor* tensor = map.GetDenseTensor(arg_name.str()); + ::Tensor* tensor = map.GetDenseTensor(arg_name.str()); if (tensor->dtype() != ::phi::DataType::FLOAT32) { CHECK(false) << "the weight tensor type now only support float32."; diff --git a/paddle/infrt/host_context/kernel_frame.cc b/paddle/infrt/host_context/kernel_frame.cc index 266c145f47839..a03ed0d156eaf 100644 --- a/paddle/infrt/host_context/kernel_frame.cc +++ b/paddle/infrt/host_context/kernel_frame.cc @@ -38,7 +38,7 @@ std::string KernelFrame::DumpArgTypes() const { DUMP(tensor::DenseHostTensor); DUMP(float); DUMP(int); - DUMP(::phi::DenseTensor); + DUMP(::Tensor); DUMP(::phi::MetaTensor); DUMP(::phi::CPUContext); DUMP(host_context::None); diff --git a/paddle/infrt/host_context/mlir_to_runtime_translate.cc b/paddle/infrt/host_context/mlir_to_runtime_translate.cc index 81b41d61ded3e..9b2190be23c1e 100644 --- a/paddle/infrt/host_context/mlir_to_runtime_translate.cc +++ b/paddle/infrt/host_context/mlir_to_runtime_translate.cc @@ -308,7 +308,7 @@ bool MlirToRuntimeTranslator::EmitGeneralOp( arg_value = GetOpResult(upstream_op); } } - if (arg_value->is_type<::phi::DenseTensor>()) { + if (arg_value->is_type<::Tensor>()) { impl_->runtime->FeedInArgs( std::make_pair(std::to_string(i), ValueRef(arg_value))); } @@ -462,8 +462,8 @@ bool MlirToRuntimeTranslator::EmitGeneralOp( for (int i = 0, e = op->getNumResults(); i < e; i++) { auto res = op->getResult(i); if (res.getType().isa<::infrt::DenseTensorType>()) { - auto r = impl_->value_map.try_emplace( - res, ValueRef(new Value{::phi::DenseTensor()})); + auto r = + impl_->value_map.try_emplace(res, ValueRef(new Value{::Tensor()})); CHECK(r.second) << "Duplicate add mlir value [" << DumpToString(res) << "]"; res_values.push_back(r.first->second.get()); diff --git a/paddle/infrt/host_context/value.cc b/paddle/infrt/host_context/value.cc index 822ee108c897c..1bfdc59b96012 100644 --- a/paddle/infrt/host_context/value.cc +++ b/paddle/infrt/host_context/value.cc @@ -60,8 +60,8 @@ void CopyTo(const Value& from, Value* to) { else if (std::is_same::value) to->data = reinterpret_cast(arg); #ifdef INFRT_WITH_PHI - else if (std::is_same::value) - to->data = reinterpret_cast<::phi::DenseTensor const&>(arg); + else if (std::is_same::value) + to->data = reinterpret_cast<::Tensor const&>(arg); #endif else LOG(FATAL) << "Not supported Value copy: " << typeid(T).name(); diff --git a/paddle/infrt/host_context/value.h b/paddle/infrt/host_context/value.h index af785c13349fd..b5e47196d57bd 100644 --- a/paddle/infrt/host_context/value.h +++ b/paddle/infrt/host_context/value.h @@ -80,17 +80,17 @@ using ValueVariantType = ::infrt::TargetType, #ifdef INFRT_WITH_PHI ::phi::MetaTensor, - ::phi::DenseTensor, + ::Tensor, backends::CpuPhiContext, #ifdef INFRT_WITH_GPU backends::GpuPhiContext, ::phi::GPUContext, #endif // INFRT_WITH_GPU ::phi::CPUContext, - std::vector, - std::vector<::phi::DenseTensor*>, - paddle::experimental::ScalarBase<::phi::DenseTensor>, - paddle::experimental::IntArrayBase<::phi::DenseTensor>, + std::vector, + std::vector<::Tensor*>, + paddle::experimental::ScalarBase<::Tensor>, + paddle::experimental::IntArrayBase<::Tensor>, std::vector, std::vector<::phi::MetaTensor*>, ::phi::MetaConfig, @@ -146,7 +146,7 @@ class Value : public common::Object { explicit Value(::phi::GPUContext&& x) : data(std::move(x)) {} explicit Value(backends::GpuPhiContext&& x) : data(std::move(x)) {} #endif - explicit Value(::phi::DenseTensor&& x) : data(std::move(x)) {} + explicit Value(::Tensor&& x) : data(std::move(x)) {} explicit Value(::phi::MetaTensor&& x) : data(std::move(x)) {} explicit Value(::phi::MetaConfig&& x) : data(std::move(x)) {} #ifdef INFRT_WITH_TRT diff --git a/paddle/infrt/kernel/phi/dense_tensor_kernels.cc b/paddle/infrt/kernel/phi/dense_tensor_kernels.cc index 8c49f47e7d873..645df69171048 100644 --- a/paddle/infrt/kernel/phi/dense_tensor_kernels.cc +++ b/paddle/infrt/kernel/phi/dense_tensor_kernels.cc @@ -37,27 +37,26 @@ namespace infrt { namespace kernel { namespace phi { -::phi::DenseTensor CreateDenseTensor( +::Tensor CreateDenseTensor( const ::phi::CPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute<::infrt::PrecisionType> precision) { - return ::phi::DenseTensor( - const_cast<::phi::Allocator*>(&context.GetAllocator()), - ::phi::DenseTensorMeta(ConvertPrecisionToPhi(precision.get()), - ::phi::make_ddim(dims.get()), - ConvertLayoutToPhi(layout.get()), - {})); + return ::Tensor(const_cast<::phi::Allocator*>(&context.GetAllocator()), + ::phi::DenseTensorMeta(ConvertPrecisionToPhi(precision.get()), + ::phi::make_ddim(dims.get()), + ConvertLayoutToPhi(layout.get()), + {})); } -::phi::DenseTensor CreateInitedDenseTensorF32( +::Tensor CreateInitedDenseTensorF32( const ::phi::CPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute value) { - ::phi::DenseTensor dense_tensor( + ::Tensor dense_tensor( const_cast<::phi::Allocator*>(&context.GetAllocator()), ::phi::DenseTensorMeta( ConvertPrecisionToPhi(::infrt::PrecisionType::FLOAT32), @@ -71,13 +70,13 @@ ::phi::DenseTensor CreateInitedDenseTensorF32( return dense_tensor; } -::phi::DenseTensor CreateHostInitedDenseTensorF32( +::Tensor CreateHostInitedDenseTensorF32( const ::phi::CPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute> values) { - ::phi::DenseTensor dense_tensor( + ::Tensor dense_tensor( const_cast<::phi::Allocator*>(&context.GetAllocator()), ::phi::DenseTensorMeta( ConvertPrecisionToPhi(::infrt::PrecisionType::FLOAT32), @@ -92,21 +91,20 @@ ::phi::DenseTensor CreateHostInitedDenseTensorF32( return dense_tensor; } -::phi::DenseTensor CreateGPUDenseTensor( +::Tensor CreateGPUDenseTensor( const ::phi::GPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute<::infrt::PrecisionType> precision) { - return ::phi::DenseTensor( - const_cast<::phi::Allocator*>(&context.GetAllocator()), - ::phi::DenseTensorMeta(ConvertPrecisionToPhi(precision.get()), - ::phi::make_ddim(dims.get()), - ConvertLayoutToPhi(layout.get()), - {})); + return ::Tensor(const_cast<::phi::Allocator*>(&context.GetAllocator()), + ::phi::DenseTensorMeta(ConvertPrecisionToPhi(precision.get()), + ::phi::make_ddim(dims.get()), + ConvertLayoutToPhi(layout.get()), + {})); } -void FillDenseTensorF32(::phi::DenseTensor* dense_tensor, +void FillDenseTensorF32(::Tensor* dense_tensor, host_context::Attribute> value) { auto place = dense_tensor->place(); float* a_data = dense_tensor->mutable_data(place); @@ -127,7 +125,7 @@ void FillDenseTensorF32(::phi::DenseTensor* dense_tensor, } } -void PrintDenseTensor(::phi::DenseTensor* dense_tensor) { +void PrintDenseTensor(::Tensor* dense_tensor) { #ifndef INFRT_WITH_GPU #define PRINT_META_DATA(PHI_DATATYPE, DTYPE) \ case ::phi::DataType::PHI_DATATYPE: { \ @@ -204,8 +202,7 @@ ::infrt::phi::DenseTensorMap LoadParameters(const std::string& file_path) { std::ifstream param_file(param_path, std::ios::binary); switch (var.type().type()) { case ::paddle::framework::proto::VarType_Type_LOD_TENSOR: { - std::unique_ptr<::phi::DenseTensor> tensor{ - std::make_unique<::phi::DenseTensor>()}; + std::unique_ptr<::Tensor> tensor{std::make_unique<::Tensor>()}; ::infrt::paddle::DeserializeFromStream(param_file, tensor.get(), ctx); map.SetDenseTensor(var.name(), std::move(tensor)); } break; @@ -253,8 +250,7 @@ ::infrt::phi::DenseTensorMap LoadCombinedParameters( ctx.SetHostAllocator(allocator_ptr); ctx.SetZeroAllocator(allocator_ptr); for (auto& var : tmp) { - std::unique_ptr<::phi::DenseTensor> tensor{ - std::make_unique<::phi::DenseTensor>()}; + std::unique_ptr<::Tensor> tensor{std::make_unique<::Tensor>()}; ::infrt::paddle::DeserializeFromStream(param_file, tensor.get(), ctx); map.SetDenseTensor(var, std::move(tensor)); } @@ -289,8 +285,7 @@ ::infrt::phi::DenseTensorMap LoadCombinedParamsToGpu( ctx.PartialInitWithoutAllocator(); for (auto& var : tmp) { - std::unique_ptr<::phi::DenseTensor> tensor{ - std::make_unique<::phi::DenseTensor>()}; + std::unique_ptr<::Tensor> tensor{std::make_unique<::Tensor>()}; ::paddle::framework::DeserializeFromStream(param_file, tensor.get(), ctx); map.SetDenseTensor(var, std::move(tensor)); } @@ -305,9 +300,8 @@ ::infrt::phi::DenseTensorMap LoadCombinedParams( return LoadCombinedParameters(model_path.get(), params_path.get()); } -::phi::DenseTensor TensorMapGetTensor( - const ::infrt::phi::DenseTensorMap& map, - host_context::Attribute name) { +::Tensor TensorMapGetTensor(const ::infrt::phi::DenseTensorMap& map, + host_context::Attribute name) { auto* tensor = map.GetDenseTensor(name.get()); CHECK(tensor); return *tensor; @@ -348,10 +342,10 @@ inline size_t SizeOfDataType(::phi::DataType data_type) { } return 0; } -void GpuMemCpy(const ::phi::DenseTensor& input, +void GpuMemCpy(const ::Tensor& input, const ::phi::GPUContext& context, bool d2h, - ::phi::DenseTensor* output) { + ::Tensor* output) { if (d2h) { CHECK(input.place().GetType() == ::phi::AllocationType::GPU); diff --git a/paddle/infrt/kernel/phi/dense_tensor_kernels.h b/paddle/infrt/kernel/phi/dense_tensor_kernels.h index 573b8f102ec7c..4a41ccdcfa29d 100644 --- a/paddle/infrt/kernel/phi/dense_tensor_kernels.h +++ b/paddle/infrt/kernel/phi/dense_tensor_kernels.h @@ -25,46 +25,45 @@ namespace infrt { namespace kernel { namespace phi { -::phi::DenseTensor CreateDenseTensor( +::Tensor CreateDenseTensor( const ::phi::CPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute<::infrt::PrecisionType> precision); -::phi::DenseTensor CreateInitedDenseTensorF32( +::Tensor CreateInitedDenseTensorF32( const ::phi::CPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute value); -::phi::DenseTensor CreateHostInitedDenseTensorF32( +::Tensor CreateHostInitedDenseTensorF32( const ::phi::CPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute> values); -::phi::DenseTensor CreateGPUDenseTensor( +::Tensor CreateGPUDenseTensor( const ::phi::GPUContext& context, host_context::Attribute> dims, host_context::Attribute> lod, host_context::Attribute<::infrt::LayoutType> layout, host_context::Attribute<::infrt::PrecisionType> precision); -void FillDenseTensorF32(::phi::DenseTensor* dense_tensor, +void FillDenseTensorF32(::Tensor* dense_tensor, host_context::Attribute> values); -void PrintDenseTensor(::phi::DenseTensor* dense_tensor); +void PrintDenseTensor(::Tensor* dense_tensor); ::infrt::phi::DenseTensorMap LoadParameters(const std::string& path); ::infrt::phi::DenseTensorMap LoadParams( host_context::Attribute path); -::phi::DenseTensor TensorMapGetTensor( - const ::infrt::phi::DenseTensorMap& map, - host_context::Attribute name); +::Tensor TensorMapGetTensor(const ::infrt::phi::DenseTensorMap& map, + host_context::Attribute name); ::infrt::phi::DenseTensorMap LoadCombinedParams( host_context::Attribute model_path, @@ -79,10 +78,10 @@ ::infrt::phi::DenseTensorMap LoadCombinedParamsToGpu( int32_t TensorMapGetSize(const ::infrt::phi::DenseTensorMap& map); #ifdef INFRT_WITH_GPU -void GpuMemCpy(const ::phi::DenseTensor& input, +void GpuMemCpy(const ::Tensor& input, const ::phi::GPUContext& context, bool d2h, - ::phi::DenseTensor* output); + ::Tensor* output); #endif } // namespace phi diff --git a/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc b/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc index aa577da60c3ae..c37569f8b4cb0 100644 --- a/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc +++ b/paddle/infrt/kernel/phi/infershaped/infershape_launchers_test.cc @@ -26,9 +26,9 @@ namespace infrt { namespace kernel { namespace { -static void ElementwiseAddTest(const ::phi::DenseTensor& a, - const ::phi::DenseTensor& b, - ::phi::DenseTensor* c); +static void ElementwiseAddTest(const ::Tensor& a, + const ::Tensor& b, + ::Tensor* c); } TEST(utils, registry) { @@ -66,9 +66,9 @@ TEST(ElementwiseAdd, launcher_registry) { auto fancy_allocator = std::unique_ptr<::phi::Allocator>(new FancyAllocator); auto* alloc = fancy_allocator.get(); - ::phi::DenseTensor a(alloc, meta); - ::phi::DenseTensor b(alloc, meta); - ::phi::DenseTensor c(alloc, meta); + ::Tensor a(alloc, meta); + ::Tensor b(alloc, meta); + ::Tensor c(alloc, meta); auto place = ::phi::CPUPlace(); float* a_data = a.mutable_data(place); diff --git a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc index cb9640451f9b2..6ee0bc20f9939 100644 --- a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc +++ b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc @@ -25,9 +25,9 @@ void InferShapedKernelLauncher::CreateKernelFrameForInferShape( for (host_context::Value* value : frame->GetValues(1, frame->GetNumElements() - 1)) { // TODO(Superjomn) To extend this. - if (value->is_type<::phi::DenseTensor>()) { - values.emplace_back(new host_context::Value{ - ::phi::MetaTensor{&value->get<::phi::DenseTensor>()}}); + if (value->is_type<::Tensor>()) { + values.emplace_back( + new host_context::Value{::phi::MetaTensor{&value->get<::Tensor>()}}); infershape_kernel_frame_builder.AddArgument(values.back().get()); } else { infershape_kernel_frame_builder.AddArgument(value); diff --git a/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h b/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h index 531d77ba952aa..999369c582654 100644 --- a/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h +++ b/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h @@ -23,7 +23,7 @@ namespace infrt { namespace kernel { namespace infershaped { -using KeyType = const ::phi::DenseTensor&; +using KeyType = const ::Tensor&; using CountType = uint8_t; constexpr CountType value(std::true_type) { return 1; } diff --git a/paddle/infrt/kernel/tensor_kernels.cc b/paddle/infrt/kernel/tensor_kernels.cc index 2e952e77d1f0a..77c2f90b26b8a 100644 --- a/paddle/infrt/kernel/tensor_kernels.cc +++ b/paddle/infrt/kernel/tensor_kernels.cc @@ -68,14 +68,14 @@ int32_t TensorMapGetSize(TensorMap map) { return map.size(); } // TODO(wilber): Maybe we should place TensorList type in dt dialect. #ifdef INFRT_WITH_PHI -::phi::DenseTensor TensorListGetTensor(std::vector<::phi::DenseTensor *> list, - Attribute idx) { +::Tensor TensorListGetTensor(std::vector<::Tensor *> list, + Attribute idx) { CHECK_LT(idx.get(), static_cast(list.size())) << "idx should less than list size"; return *list[idx.get()]; } -int32_t TensorListGetSize(const std::vector<::phi::DenseTensor *> &list) { +int32_t TensorListGetSize(const std::vector<::Tensor *> &list) { return list.size(); } #endif diff --git a/paddle/infrt/kernel/tensorrt/trt_helper.h b/paddle/infrt/kernel/tensorrt/trt_helper.h index 4f1f1dde38cbe..6f7455b848d58 100644 --- a/paddle/infrt/kernel/tensorrt/trt_helper.h +++ b/paddle/infrt/kernel/tensorrt/trt_helper.h @@ -64,7 +64,7 @@ static std::vector ArrayAttrToVec(const mlir::ArrayAttr& int_array_attr) { return ret; } -static nvinfer1::Weights TensorToWeights(::phi::DenseTensor* tensor) { +static nvinfer1::Weights TensorToWeights(::Tensor* tensor) { CHECK_NOTNULL(tensor); nvinfer1::Weights ret; ret.type = TensorTypeToWeightType(tensor->dtype()); diff --git a/paddle/infrt/kernel/tensorrt/trt_kernels.cc b/paddle/infrt/kernel/tensorrt/trt_kernels.cc index 931fe21b2c710..3d30b0264c2d4 100644 --- a/paddle/infrt/kernel/tensorrt/trt_kernels.cc +++ b/paddle/infrt/kernel/tensorrt/trt_kernels.cc @@ -69,7 +69,7 @@ ::infrt::backends::tensorrt::TrtEngine CreateTrtEngine( auto& region = operation.getRegion(0); auto& block = region.getBlocks().front(); - std::unordered_map trt_bind_inputs; + std::unordered_map trt_bind_inputs; ValueToITensorMap value_to_trt_tensor_map; ValueToTensorMap value_to_tensor_map; @@ -80,7 +80,7 @@ ::infrt::backends::tensorrt::TrtEngine CreateTrtEngine( const std::string input_name = "input_" + std::to_string(idx); auto* v = symbol_table->GetValue(std::to_string(idx)); CHECK_NOTNULL(v); - auto* t = &v->get<::phi::DenseTensor>(); + auto* t = &v->get<::Tensor>(); value_to_tensor_map[operand] = t; // TODO(wilber): get input info from mlir. @@ -186,10 +186,10 @@ void PrintTrtLayer(backends::tensorrt::TrtEngine* engine) { engine->GetEngineInfo(); } -std::vector<::phi::DenseTensor*> TrtEngineCompute( - backends::tensorrt::TrtEngine* engine, const ::phi::GPUContext& context) { +std::vector<::Tensor*> TrtEngineCompute(backends::tensorrt::TrtEngine* engine, + const ::phi::GPUContext& context) { engine->Run(context); - std::vector<::phi::DenseTensor*> res; + std::vector<::Tensor*> res; for (size_t i = 0; i < engine->GetOutputNum(); ++i) { res.push_back(engine->GetOutput("output_" + std::to_string(i))); } diff --git a/paddle/infrt/kernel/tensorrt/trt_kernels.h b/paddle/infrt/kernel/tensorrt/trt_kernels.h index bf41c124a299b..254b8ed14d7d9 100644 --- a/paddle/infrt/kernel/tensorrt/trt_kernels.h +++ b/paddle/infrt/kernel/tensorrt/trt_kernels.h @@ -40,8 +40,8 @@ ::infrt::backends::tensorrt::TrtEngine CreateTrtEngine( void PrintTrtLayer(backends::tensorrt::TrtEngine* engine); -std::vector<::phi::DenseTensor*> TrtEngineCompute( - backends::tensorrt::TrtEngine* engine, const ::phi::GPUContext& context); +std::vector<::Tensor*> TrtEngineCompute(backends::tensorrt::TrtEngine* engine, + const ::phi::GPUContext& context); } // namespace tensorrt } // namespace kernel diff --git a/paddle/infrt/paddle/model_parser.cc b/paddle/infrt/paddle/model_parser.cc index da4f8b6420b22..6fc358a4c043d 100644 --- a/paddle/infrt/paddle/model_parser.cc +++ b/paddle/infrt/paddle/model_parser.cc @@ -207,7 +207,7 @@ inline ::phi::DataType PhiDataType(framework_proto::VarType::Type type) { } inline void TensorFromStream(std::istream &is, - ::phi::DenseTensor *tensor, + ::Tensor *tensor, const ::phi::CPUContext &ctx) { uint32_t version; is.read(reinterpret_cast(&version), sizeof(version)); @@ -237,7 +237,7 @@ inline void TensorFromStream(std::istream &is, } void DeserializeFromStream(std::istream &is, - ::phi::DenseTensor *tensor, + ::Tensor *tensor, const ::phi::CPUContext &dev_ctx) { { // the 1st field, unit32_t version for LoDTensor diff --git a/paddle/infrt/paddle/model_parser.h b/paddle/infrt/paddle/model_parser.h index 5f039ad5d3ad8..39af7a919318b 100644 --- a/paddle/infrt/paddle/model_parser.h +++ b/paddle/infrt/paddle/model_parser.h @@ -60,7 +60,7 @@ void ReadBinaryFile(const std::string& filename, std::string* contents); #ifdef INFRT_WITH_PHI void DeserializeFromStream(std::istream& is, - ::phi::DenseTensor* tensor, + ::Tensor* tensor, const ::phi::CPUContext& dev_ctx); #endif diff --git a/paddle/infrt/tensor/phi/tensor_map.cc b/paddle/infrt/tensor/phi/tensor_map.cc index afac7175caf4f..dd273a175d200 100644 --- a/paddle/infrt/tensor/phi/tensor_map.cc +++ b/paddle/infrt/tensor/phi/tensor_map.cc @@ -20,8 +20,8 @@ namespace infrt { namespace phi { -void DenseTensorMap::SetDenseTensor( - const std::string& name, std::unique_ptr<::phi::DenseTensor>&& tensor) { +void DenseTensorMap::SetDenseTensor(const std::string& name, + std::unique_ptr<::Tensor>&& tensor) { std::lock_guard lock(mu_); auto it = map_.emplace(std::make_pair(name, std::move(tensor))); if (!it.second) { @@ -29,8 +29,7 @@ void DenseTensorMap::SetDenseTensor( } } -::phi::DenseTensor* DenseTensorMap::GetDenseTensor( - const std::string& name) const { +::Tensor* DenseTensorMap::GetDenseTensor(const std::string& name) const { std::lock_guard lock(mu_); auto it = map_.find(name); if (it != map_.end()) { diff --git a/paddle/infrt/tensor/phi/tensor_map.h b/paddle/infrt/tensor/phi/tensor_map.h index 5a754f42fb63c..8b72cd924bf58 100644 --- a/paddle/infrt/tensor/phi/tensor_map.h +++ b/paddle/infrt/tensor/phi/tensor_map.h @@ -26,13 +26,13 @@ class DenseTensorMap { DenseTensorMap() = default; DenseTensorMap(DenseTensorMap&& other) : map_(std::move(other.map_)) {} void SetDenseTensor(const std::string& name, - std::unique_ptr<::phi::DenseTensor>&& tensor); - ::phi::DenseTensor* GetDenseTensor(const std::string& name) const; + std::unique_ptr<::Tensor>&& tensor); + ::Tensor* GetDenseTensor(const std::string& name) const; size_t size() const; private: mutable std::mutex mu_; - std::unordered_map> map_; + std::unordered_map> map_; }; } // namespace phi diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index 67cedaf6710ab..628ea8c10245a 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -63,7 +63,7 @@ class AbstractAutogradMeta { * computation. * * This is a new Tensor design, which is independent of the original - * framework::Tensor in fluid. The original Tensor will be gradually discarded + * phi::DenseTensor in fluid. The original Tensor will be gradually discarded * in the future. * * Note: Tensor can be NULL state, Tensor is meaningful only when the diff --git a/paddle/phi/api/lib/utils/tensor_utils.cc b/paddle/phi/api/lib/utils/tensor_utils.cc index c9fb2d3734edc..b597b5085479d 100644 --- a/paddle/phi/api/lib/utils/tensor_utils.cc +++ b/paddle/phi/api/lib/utils/tensor_utils.cc @@ -32,7 +32,7 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { } std::unique_ptr MakePhiDenseTensor( - const paddle::framework::Tensor& src) { + const phi::DenseTensor& src) { return std::make_unique(src); } @@ -62,9 +62,7 @@ phi::Scalar MakePhiScalarFromVar(const framework::Variable& variable) { } } -phi::IntArray MakePhiIntArray(const paddle::framework::Tensor& src) { - return {src}; -} +phi::IntArray MakePhiIntArray(const phi::DenseTensor& src) { return {src}; } phi::IntArray MakePhiIntArrayFromVar(const framework::Variable& variable) { if (variable.IsType()) { diff --git a/paddle/phi/api/lib/utils/tensor_utils.h b/paddle/phi/api/lib/utils/tensor_utils.h index f930f5b11f64f..5b237f433aa6f 100644 --- a/paddle/phi/api/lib/utils/tensor_utils.h +++ b/paddle/phi/api/lib/utils/tensor_utils.h @@ -29,9 +29,9 @@ namespace paddle { namespace experimental { std::unique_ptr MakePhiDenseTensor( - const paddle::framework::Tensor& src); + const phi::DenseTensor& src); -phi::IntArray MakePhiIntArray(const paddle::framework::Tensor& src); +phi::IntArray MakePhiIntArray(const phi::DenseTensor& src); phi::Scalar MakePhiScalarFromVar(const framework::Variable& variable); diff --git a/paddle/phi/backends/custom/custom_device_test.cc b/paddle/phi/backends/custom/custom_device_test.cc index 2458241c3c85d..5b96a9979a596 100644 --- a/paddle/phi/backends/custom/custom_device_test.cc +++ b/paddle/phi/backends/custom/custom_device_test.cc @@ -76,7 +76,7 @@ void TestDeviceInterface(const paddle::platform::Place& place) { void TestTensorMutableData(const paddle::platform::Place& place) { std::cout << "TestTensorInitialization on " << place << std::endl; - paddle::framework::Tensor src_tensor; + phi::DenseTensor src_tensor; float* p1 = nullptr; float* p2 = nullptr; // initialization @@ -101,8 +101,8 @@ void TestTensorMutableData(const paddle::platform::Place& place) { void TestTensorShareDataWith(const paddle::platform::Place& place) { std::cout << "TestTensorShareDataWith on " << place << std::endl; - paddle::framework::Tensor src_tensor; - paddle::framework::Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor dst_tensor; src_tensor.mutable_data(phi::make_ddim({2, 3, 4}), place); dst_tensor.ShareDataWith(src_tensor); ASSERT_EQ(src_tensor.data(), dst_tensor.data()); @@ -113,9 +113,9 @@ void TestTensorUtils(const paddle::platform::Place& place) { if (paddle::platform::is_custom_place(place) == false) { return; } - paddle::framework::Tensor src_tensor; - paddle::framework::Tensor gpu_tensor; - paddle::framework::Tensor dst_tensor; + phi::DenseTensor src_tensor; + phi::DenseTensor gpu_tensor; + phi::DenseTensor dst_tensor; int* src_ptr = src_tensor.mutable_data(phi::make_ddim({3, 3}), paddle::platform::CPUPlace()); @@ -148,7 +148,7 @@ void TestTensorUtils(const paddle::platform::Place& place) { EXPECT_EQ(src_ptr[i], dst_ptr_tmp[i]); } - paddle::framework::Tensor slice_tensor = src_tensor.Slice(1, 2); + phi::DenseTensor slice_tensor = src_tensor.Slice(1, 2); // CPU Slice Tensor to GPU Tensor paddle::framework::TensorCopy(slice_tensor, place, gpu_ctx, &gpu_tensor); diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index e9a6be66b98ca..abf242acdb22a 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -192,9 +192,9 @@ class DenseTensor : public TensorBase, - Question: In what scenarios will version counters NOT be shared? - Answer: Replacing a `Variable`'s data by calling `Tensor::ShareDataWith(...)` or `Tensor::ShareBufferWith(...)`. Because they - share the same Allocation but not framework::Tensor. + share the same Allocation but not phi::DenseTensor. - - Question: Why put the inplace_version_counter_ in framework::Tensor instead + - Question: Why put the inplace_version_counter_ in phi::DenseTensor instead of Allocation or Variable? - Answer: 1. Tensor can call ResetHolder() to reset the corresponding Allocation so diff --git a/paddle/phi/core/dense_tensor.inl b/paddle/phi/core/dense_tensor.inl index eead55d8a0067..1ed772fd67586 100644 --- a/paddle/phi/core/dense_tensor.inl +++ b/paddle/phi/core/dense_tensor.inl @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ /* --------------------------- */ -/* From framework::Tensor */ +/* From phi::DenseTensor */ /* --------------------------- */ -/* The following members & interfaces were copied from framework::Tensor, +/* The following members & interfaces were copied from phi::DenseTensor, so as to facilitate the unification of different Tensors Will be adjusted/removed/moved in the near future @@ -134,7 +134,7 @@ inline void set_format(const dnnl::memory::format_tag format) { /* ------------------------------ */ /* From framework::LoDTensor */ /* ------------------------------ */ -/* The following members & interfaces were copied from framework::Tensor, +/* The following members & interfaces were copied from phi::DenseTensor, so as to facilitate the unification of different Tensors Will be adjusted/removed/moved in the near future diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc index c4600328c4afa..4982f0db3e012 100644 --- a/paddle/phi/core/dense_tensor_impl.cc +++ b/paddle/phi/core/dense_tensor_impl.cc @@ -25,7 +25,7 @@ limitations under the License. */ namespace phi { /* --------------------------- */ -/* From framework::Tensor */ +/* From phi::DenseTensor */ /* --------------------------- */ DenseTensor::DenseTensor() { meta_.dtype = paddle::experimental::DataType::FLOAT32; diff --git a/paddle/phi/kernels/funcs/fc_functor.cc b/paddle/phi/kernels/funcs/fc_functor.cc index 0434483be1326..f428746bc524d 100644 --- a/paddle/phi/kernels/funcs/fc_functor.cc +++ b/paddle/phi/kernels/funcs/fc_functor.cc @@ -33,12 +33,12 @@ void FCFunctor::operator()(const DeviceContext& context, bool relu, bool padding_weights) { auto blas = GetBlas(context); - paddle::framework::Tensor Y1; + phi::DenseTensor Y1; T* Y1_data = nullptr; if (padding_weights) { const int NN = N + 4; const int KK = K + 4; - paddle::framework::Tensor X1; + phi::DenseTensor X1; T* X1_data = X1.mutable_data({M * KK}, paddle::platform::CPUPlace()); Y1_data = Y1.mutable_data({M * (N + 4)}, paddle::platform::CPUPlace()); #ifdef PADDLE_WITH_MKLML diff --git a/paddle/phi/kernels/funcs/math_function.cc b/paddle/phi/kernels/funcs/math_function.cc index 19bbec124f2ca..7102b9cb11ad6 100644 --- a/paddle/phi/kernels/funcs/math_function.cc +++ b/paddle/phi/kernels/funcs/math_function.cc @@ -108,8 +108,8 @@ DEFINE_CPU_TRANS(6); template void TransposeNormal::operator()( const DeviceContext& context, - const paddle::framework::Tensor& in, - paddle::framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& axis) { const int rank = axis.size(); auto in_stride = phi::stride(in.dims()); @@ -151,7 +151,7 @@ DEFINE_CPU_TRANS_NORMAL(phi::dtype::complex); DEFINE_CPU_TRANS_NORMAL(phi::dtype::complex); struct TensorSetConstantCPU { - TensorSetConstantCPU(paddle::framework::Tensor* tensor, float value) + TensorSetConstantCPU(phi::DenseTensor* tensor, float value) : tensor_(tensor), value_(value) {} template void apply() const { @@ -159,14 +159,14 @@ struct TensorSetConstantCPU { auto* begin = tensor_->mutable_data(cpu); std::fill(begin, begin + tensor_->numel(), static_cast(value_)); } - paddle::framework::Tensor* tensor_; + phi::DenseTensor* tensor_; float value_; }; template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { PADDLE_THROW(phi::errors::Unimplemented("XPUPlace is not supported")); } @@ -174,7 +174,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { PADDLE_THROW(phi::errors::Unimplemented("NPUPlace is not supported")); } @@ -182,7 +182,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { PADDLE_THROW(phi::errors::Unimplemented("NPUPinnedPlace is not supported")); } @@ -190,7 +190,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { PADDLE_THROW(phi::errors::Unimplemented("IPUPlace is not supported")); } @@ -198,7 +198,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { PADDLE_THROW(phi::errors::Unimplemented("CustomPlace is not supported")); } @@ -206,7 +206,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { phi::VisitDataType(tensor->dtype(), TensorSetConstantCPU(tensor, value)); } @@ -214,7 +214,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { PADDLE_THROW(phi::errors::Unimplemented("MLUPlace is not supported")); } @@ -222,7 +222,7 @@ void set_constant_with_place( template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { phi::VisitDataType(tensor->dtype(), TensorSetConstantCPU(tensor, value)); } @@ -230,7 +230,7 @@ void set_constant_with_place( struct TensorSetConstantWithPlace : public std::unary_function { TensorSetConstantWithPlace(const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) : context_(context), tensor_(tensor), value_(value) {} @@ -240,12 +240,12 @@ struct TensorSetConstantWithPlace } const paddle::platform::DeviceContext& context_; - paddle::framework::Tensor* tensor_; + phi::DenseTensor* tensor_; float value_; }; void set_constant(const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { TensorSetConstantWithPlace func(context, tensor, value); #ifdef PADDLE_WITH_CUSTOM_DEVICE @@ -273,9 +273,9 @@ template struct RowwiseMean; template struct RowwiseAdd { void operator()(const phi::CPUContext& context, - const paddle::framework::Tensor& input, - const paddle::framework::Tensor& vector, - paddle::framework::Tensor* output) { + const phi::DenseTensor& input, + const phi::DenseTensor& vector, + phi::DenseTensor* output) { auto in_dims = input.dims(); auto out_dims = output->dims(); auto size = input.numel() / in_dims[0]; diff --git a/paddle/phi/kernels/funcs/math_function.cu b/paddle/phi/kernels/funcs/math_function.cu index c829adbc41373..06ea7f573a5a1 100644 --- a/paddle/phi/kernels/funcs/math_function.cu +++ b/paddle/phi/kernels/funcs/math_function.cu @@ -105,8 +105,8 @@ __global__ void TransposeNormalKernel(const T* in_ptr, template void TransposeNormal::operator()( const DeviceContext& context, - const paddle::framework::Tensor& in, - paddle::framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& axis) { const int rank = axis.size(); auto in_stride = phi::stride(in.dims()); @@ -215,7 +215,7 @@ DEFINE_GPU_TRANS_NORMAL(phi::dtype::complex); struct TensorSetConstantGPU { TensorSetConstantGPU(const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) : context_(context), tensor_(tensor), value_(value) {} @@ -228,14 +228,14 @@ struct TensorSetConstantGPU { } const paddle::platform::DeviceContext& context_; - paddle::framework::Tensor* tensor_; + phi::DenseTensor* tensor_; float value_; }; template <> void set_constant_with_place( const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value) { phi::VisitDataType(tensor->dtype(), TensorSetConstantGPU(context, tensor, value)); @@ -255,9 +255,9 @@ __global__ void RowwiseAddKernel( template struct RowwiseAdd { void operator()(const phi::GPUContext& context, - const paddle::framework::Tensor& input, - const paddle::framework::Tensor& vector, - paddle::framework::Tensor* output) { + const phi::DenseTensor& input, + const phi::DenseTensor& vector, + phi::DenseTensor* output) { auto in_dims = input.dims(); auto out_dims = output->dims(); auto size = input.numel() / in_dims[0]; @@ -304,8 +304,8 @@ template struct ColwiseSum; template <> void ColwiseSum::operator()( const phi::GPUContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* vector) { + const phi::DenseTensor& input, + phi::DenseTensor* vector) { auto in_dims = input.dims(); auto size = input.numel() / in_dims[0]; PADDLE_ENFORCE_EQ(vector->numel(), @@ -316,7 +316,7 @@ void ColwiseSum::operator()( " dimension. Expected vector size=%d, but received %d", size, vector->numel())); - paddle::framework::Tensor one; + phi::DenseTensor one; one.mutable_data({in_dims[0]}, context.GetPlace()); SetConstant set; set(context, &one, static_cast(1.0)); @@ -340,8 +340,8 @@ template struct RowwiseSum; template <> void RowwiseSum::operator()( const phi::GPUContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* vector) { + const phi::DenseTensor& input, + phi::DenseTensor* vector) { auto in_dims = input.dims(); auto size = input.numel() / in_dims[0]; PADDLE_ENFORCE_EQ(vector->numel(), @@ -352,7 +352,7 @@ void RowwiseSum::operator()( " dimension. Expected vector size=%d, but received %d", in_dims[0], vector->numel())); - paddle::framework::Tensor one; + phi::DenseTensor one; one.mutable_data({size}, context.GetPlace()); SetConstant set; set(context, &one, static_cast(1.0)); diff --git a/paddle/phi/kernels/funcs/math_function.h b/paddle/phi/kernels/funcs/math_function.h index d894ef2b41d82..3a95c998b1f9a 100644 --- a/paddle/phi/kernels/funcs/math_function.h +++ b/paddle/phi/kernels/funcs/math_function.h @@ -34,84 +34,82 @@ template struct TransposeNormal { // for dims >= 7 situation void operator()(const DeviceContext& context, - const paddle::framework::Tensor& in, - paddle::framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& axis); }; template struct Transpose { void operator()(const DeviceContext& context, - const paddle::framework::Tensor& in, - paddle::framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& axis); }; template struct SetConstant { void operator()(const DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, T num); }; #ifdef PADDLE_WITH_XPU template struct SetConstant { - void operator()(const XPUContext& context, - paddle::framework::Tensor* tensor, - T num); + void operator()(const XPUContext& context, phi::DenseTensor* tensor, T num); }; template struct SetConstant { void operator()(const paddle::platform::XPUDeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, T num); }; #endif template void set_constant_with_place(const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value); void set_constant(const paddle::platform::DeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, float value); template struct RowwiseAdd { void operator()(const DeviceContext& context, - const paddle::framework::Tensor& input, - const paddle::framework::Tensor& vec, - paddle::framework::Tensor* output); + const phi::DenseTensor& input, + const phi::DenseTensor& vec, + phi::DenseTensor* output); }; template struct ColwiseSum { void operator()(const DeviceContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* vec); + const phi::DenseTensor& input, + phi::DenseTensor* vec); }; template struct RowwiseSum { void operator()(const DeviceContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* vec); + const phi::DenseTensor& input, + phi::DenseTensor* vec); }; template struct RowwiseMean { void operator()(const DeviceContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* vec); + const phi::DenseTensor& input, + phi::DenseTensor* vec); }; #ifdef PADDLE_WITH_XPU template struct TensorSetConstantXPU { - TensorSetConstantXPU(paddle::framework::Tensor* tensor, + TensorSetConstantXPU(phi::DenseTensor* tensor, U value, paddle::platform::Place place) : tensor_(tensor), value_(value), place_(place) {} @@ -127,7 +125,7 @@ struct TensorSetConstantXPU { static_cast(data_cpu.get()), numel * sizeof(T)); } - paddle::framework::Tensor* tensor_; + phi::DenseTensor* tensor_; U value_; paddle::platform::Place place_; }; diff --git a/paddle/phi/kernels/funcs/math_function_impl.h b/paddle/phi/kernels/funcs/math_function_impl.h index a6aeeb4f63c0d..512f21e82091c 100644 --- a/paddle/phi/kernels/funcs/math_function_impl.h +++ b/paddle/phi/kernels/funcs/math_function_impl.h @@ -25,8 +25,9 @@ namespace funcs { using paddle::framework::To32BitIndex; template -void SetConstant::operator()( - const DeviceContext& context, paddle::framework::Tensor* tensor, T num) { +void SetConstant::operator()(const DeviceContext& context, + phi::DenseTensor* tensor, + T num) { auto t = paddle::framework::EigenVector::Flatten(*tensor); t.device(*context.eigen_device()) = t.constant(static_cast(num)); } @@ -34,7 +35,7 @@ void SetConstant::operator()( #ifdef PADDLE_WITH_XPU template void SetConstant::operator()(const XPUContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, T num) { phi::VisitDataType(tensor->dtype(), TensorSetConstantXPU(tensor, num, context.GetPlace())); @@ -42,7 +43,7 @@ void SetConstant::operator()(const XPUContext& context, template void SetConstant::operator()( const paddle::platform::XPUDeviceContext& context, - paddle::framework::Tensor* tensor, + phi::DenseTensor* tensor, T num) { phi::VisitDataType(tensor->dtype(), TensorSetConstantXPU(tensor, num, context.GetPlace())); @@ -52,8 +53,8 @@ void SetConstant::operator()( template void Transpose::operator()( const DeviceContext& context, - const paddle::framework::Tensor& in, - paddle::framework::Tensor* out, + const phi::DenseTensor& in, + phi::DenseTensor* out, const std::vector& axis) { Eigen::array permute; for (int i = 0; i < Rank; i++) { @@ -74,10 +75,9 @@ void Transpose::operator()( } template -void ColwiseSum::operator()( - const DeviceContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* out) { +void ColwiseSum::operator()(const DeviceContext& context, + const phi::DenseTensor& input, + phi::DenseTensor* out) { auto in_dims = input.dims(); auto size = input.numel() / in_dims[0]; PADDLE_ENFORCE_EQ(out->numel(), @@ -102,8 +102,8 @@ template class ColwiseSum { public: void operator()(const phi::CPUContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* out) { + const phi::DenseTensor& input, + phi::DenseTensor* out) { auto& in_dims = input.dims(); auto height = in_dims[0]; auto size = in_dims[1]; @@ -133,10 +133,9 @@ class ColwiseSum { }; template -void RowwiseMean::operator()( - const DeviceContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* out) { +void RowwiseMean::operator()(const DeviceContext& context, + const phi::DenseTensor& input, + phi::DenseTensor* out) { auto in_dims = input.dims(); PADDLE_ENFORCE_EQ(in_dims.size(), 2U, @@ -165,8 +164,8 @@ template class RowwiseMean { public: void operator()(const phi::CPUContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* out) { + const phi::DenseTensor& input, + phi::DenseTensor* out) { auto& in_dims = input.dims(); PADDLE_ENFORCE_EQ( in_dims.size(), @@ -200,10 +199,9 @@ class RowwiseMean { }; template -void RowwiseSum::operator()( - const DeviceContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* out) { +void RowwiseSum::operator()(const DeviceContext& context, + const phi::DenseTensor& input, + phi::DenseTensor* out) { auto in_dims = input.dims(); PADDLE_ENFORCE_EQ(in_dims.size(), 2U, @@ -232,8 +230,8 @@ template class RowwiseSum { public: void operator()(const phi::CPUContext& context, - const paddle::framework::Tensor& input, - paddle::framework::Tensor* out) { + const phi::DenseTensor& input, + phi::DenseTensor* out) { auto& in_dims = input.dims(); PADDLE_ENFORCE_EQ( in_dims.size(), diff --git a/paddle/phi/kernels/funcs/sequence2batch.cc b/paddle/phi/kernels/funcs/sequence2batch.cc index 7cad5b6c0b929..302dd6ec6ac62 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cc +++ b/paddle/phi/kernels/funcs/sequence2batch.cc @@ -21,9 +21,9 @@ template class CopyMatrixRowsFunctor { public: void operator()(const phi::CPUContext& context, - const paddle::framework::Tensor& src, + const phi::DenseTensor& src, paddle::framework::Vector index_lod, - paddle::framework::Tensor* dst, + phi::DenseTensor* dst, bool is_src_index) { size_t* index = index_lod.data(); auto src_dims = src.dims(); diff --git a/paddle/phi/kernels/funcs/sequence2batch.cu b/paddle/phi/kernels/funcs/sequence2batch.cu index 196ca7a2ef96e..6c8ec9bca017a 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.cu +++ b/paddle/phi/kernels/funcs/sequence2batch.cu @@ -42,9 +42,9 @@ template class CopyMatrixRowsFunctor { public: void operator()(const phi::GPUContext& context, - const paddle::framework::Tensor& src, + const phi::DenseTensor& src, paddle::framework::Vector index_lod, - paddle::framework::Tensor* dst, + phi::DenseTensor* dst, bool is_src_index) { auto src_dims = src.dims(); auto dst_dims = dst->dims(); diff --git a/paddle/phi/kernels/funcs/sequence2batch.h b/paddle/phi/kernels/funcs/sequence2batch.h index ed3a50d883dc0..e73004303d576 100644 --- a/paddle/phi/kernels/funcs/sequence2batch.h +++ b/paddle/phi/kernels/funcs/sequence2batch.h @@ -38,9 +38,9 @@ class CopyMatrixRowsFunctor { // copy the input src to the indexed rows of output dst. // The indexed rows are based on the input index. void operator()(const DeviceContext& context, - const paddle::framework::Tensor& src, + const phi::DenseTensor& src, paddle::framework::Vector index_lod, - paddle::framework::Tensor* dst, + phi::DenseTensor* dst, bool is_src_index); }; diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 5c6fd04c15e68..4b63f6758aa29 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -1105,7 +1105,7 @@ void BatchNormKernel(const Context &ctx, // Create reserve space and workspace for batch norm. // Create tensor for each batchnorm op, it will be used in the // backward. Thus this tensor shouldn't be temp. - // auto *reserve_space = ctx.Output("ReserveSpace"); + // auto *reserve_space = ctx.Output("ReserveSpace"); if (reserve_space == nullptr) { reserve_space = &reserve_space_tensor; } diff --git a/paddle/phi/kernels/gpu/depthwise_conv.h b/paddle/phi/kernels/gpu/depthwise_conv.h index eae7b77519911..9acd67390face 100644 --- a/paddle/phi/kernels/gpu/depthwise_conv.h +++ b/paddle/phi/kernels/gpu/depthwise_conv.h @@ -47,12 +47,12 @@ template & strides, const std::vector& paddings, const std::vector& dilations, - framework::Tensor* output, + phi::DenseTensor* output, const DataLayout data_layout = DataLayout::kNCHW); }; @@ -62,13 +62,13 @@ template & strides, const std::vector& paddings, const std::vector& dilations, - framework::Tensor* input_grad, + phi::DenseTensor* input_grad, const DataLayout data_layout = DataLayout::kNCHW); }; @@ -78,12 +78,12 @@ template & strides, const std::vector& paddings, const std::vector& dilations, - framework::Tensor* filter_grad, + phi::DenseTensor* filter_grad, const DataLayout data_layout = DataLayout::kNCHW); }; @@ -176,7 +176,8 @@ __device__ __inline__ void KernelDepthwiseConvNCHW( int offset = in_offset + h_in * input_width + w_in; T in_data = input_data[offset]; if (fuse_relu_before_conv) { - value += weight[weight_offset] * T(max(0.0f, double(in_data))); + value += + weight[weight_offset] * static(max(0.0f, double(in_data))); } else { value += weight[weight_offset] * in_data; } @@ -228,7 +229,7 @@ __device__ __inline__ void KernelDepthwiseConvNHWC( T in_data = input_data[offset]; const T* weight = filter_data + weight_offset * output_channels + c_out; if (fuse_relu_before_conv) { - value += weight[0] * T(max(0.0f, double(in_data))); + value += weight[0] * static_cast(max(0.0f, double(in_data))); } else { value += weight[0] * in_data; } @@ -281,7 +282,7 @@ __device__ __inline__ void KernelDepthwiseConvCFilterNCHW( int offset = in_offset + h_in * input_width + w_in; if (fuse_relu_before_conv) { value += r_weight[h_f * c_filter + w_f] * - T(max(0.0f, double(input_data[offset]))); + static_cast(max(0.0f, double(input_data[offset]))); } else { value += r_weight[h_f * c_filter + w_f] * input_data[offset]; } @@ -337,7 +338,7 @@ __device__ __inline__ void KernelDepthwiseConvCFilterNHWC( in_offset + (h_in * input_width + w_in) * input_channels + c_in; if (fuse_relu_before_conv) { value += r_weight[h_f * c_filter + w_f] * - T(max(0.0, double(input_data[offset]))); + static_cast(max(0.0, double(input_data[offset]))); } else { value += r_weight[h_f * c_filter + w_f] * input_data[offset]; } @@ -880,7 +881,7 @@ __device__ __inline__ void KernelDepthwiseConvFilterGradNCHW( image_wk; if (fuse_relu_before_conv) { s += output_grad_data[gaid(bid, kernel_id, image_h, image_w)] * - T(max(0.0f, double(input_data[input_id]))); + static_cast(max(0.0f, double(input_data[input_id]))); } else { s += output_grad_data[gaid(bid, kernel_id, image_h, image_w)] * input_data[input_id]; @@ -941,7 +942,7 @@ __device__ __inline__ void KernelDepthwiseConvFilterGradNHWC( kernel_id / filter_multiplier; if (fuse_relu_before_conv) { s += output_grad_data[gaid(bid, image_h, image_w, kernel_id)] * - T(max(0.0f, double(input_data[input_id]))); + static_cast(max(0.0f, double(input_data[input_id]))); } else { s += output_grad_data[gaid(bid, image_h, image_w, kernel_id)] * input_data[input_id]; @@ -1013,7 +1014,7 @@ __device__ __inline__ void KernelDepthwiseConvFilterGradCFilterNHWC( T s(0); if (fuse_relu_before_conv) { s = output_grad_data[output_id] * - T(max(0.0f, double(input_data[input_id]))); + static_cast(max(0.0f, double(input_data[input_id]))); } else { s = output_grad_data[output_id] * input_data[input_id]; } @@ -1163,12 +1164,12 @@ template class DepthwiseConvFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& filter, + const phi::DenseTensor& input, + const phi::DenseTensor& filter, const std::vector& strides, const std::vector& paddings, const std::vector& dilations, - framework::Tensor* output, + phi::DenseTensor* output, const DataLayout data_layout = DataLayout::kNCHW) { const int batch_size = input.dims()[0]; const int input_channels = @@ -1199,7 +1200,7 @@ class DepthwiseConvFunctor { const T* filter_data = filter.data(); T* output_data = output->mutable_data(context.GetPlace()); - framework::Tensor filter_hwc; + phi::DenseTensor filter_hwc; if (data_layout == DataLayout::kNHWC) { framework::DDim filter_hwc_dims({filter.dims()[2], filter.dims()[3], @@ -1340,13 +1341,13 @@ template class DepthwiseConvInputGradFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& filter, - const framework::Tensor& output_grad, + const phi::DenseTensor& input, + const phi::DenseTensor& filter, + const phi::DenseTensor& output_grad, const std::vector& strides, const std::vector& paddings, const std::vector& dilations, - framework::Tensor* input_grad, + phi::DenseTensor* input_grad, const DataLayout data_layout = DataLayout::kNCHW) { const int batch_size = input.dims()[0]; const int input_channels = @@ -1378,7 +1379,7 @@ class DepthwiseConvInputGradFunctor { const T* output_grad_data = output_grad.data(); T* input_grad_data = input_grad->mutable_data(context.GetPlace()); - framework::Tensor filter_hwc; + phi::DenseTensor filter_hwc; if (data_layout == DataLayout::kNHWC) { framework::DDim filter_hwc_dims({filter.dims()[2], filter.dims()[3], @@ -1505,12 +1506,12 @@ class DepthwiseConvFilterGradFunctor { public: void operator()(const phi::GPUContext& context, - const framework::Tensor& input, - const framework::Tensor& output_grad, + const phi::DenseTensor& input, + const phi::DenseTensor& output_grad, const std::vector& strides, const std::vector& paddings, const std::vector& dilations, - framework::Tensor* filter_grad, + phi::DenseTensor* filter_grad, const DataLayout data_layout = DataLayout::kNCHW) { const int batch_size = input.dims()[0]; const int input_channels = @@ -1598,7 +1599,7 @@ class DepthwiseConvFilterGradFunctordims()[2], \ filter_grad->dims()[3], \ diff --git a/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h b/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h index 8731316317d47..bd8e529ff2ee5 100644 --- a/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h +++ b/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h @@ -84,19 +84,19 @@ void AverageAccumulatesKernel(const Context& dev_ctx, max_average_window)); // Get inputs - // auto* param = ctx.Input("param"); - // auto* in_sum_1 = ctx.Input("in_sum_1"); - // auto* in_sum_2 = ctx.Input("in_sum_2"); - // auto* in_sum_3 = ctx.Input("in_sum_3"); + // auto* param = ctx.Input("param"); + // auto* in_sum_1 = ctx.Input("in_sum_1"); + // auto* in_sum_2 = ctx.Input("in_sum_2"); + // auto* in_sum_3 = ctx.Input("in_sum_3"); auto param_tensor = EigenVector::Flatten(param); auto in_sum_1_tensor = EigenVector::Flatten(in_sum_1); auto in_sum_2_tensor = EigenVector::Flatten(in_sum_2); auto in_sum_3_tensor = EigenVector::Flatten(in_sum_3); // Get outputs - // auto* out_sum_1 = ctx.Output("out_sum_1"); - // auto* out_sum_2 = ctx.Output("out_sum_2"); - // auto* out_sum_3 = ctx.Output("out_sum_3"); + // auto* out_sum_1 = ctx.Output("out_sum_1"); + // auto* out_sum_2 = ctx.Output("out_sum_2"); + // auto* out_sum_3 = ctx.Output("out_sum_3"); dev_ctx.template Alloc(out_sum_1); dev_ctx.template Alloc(out_sum_2); dev_ctx.template Alloc(out_sum_3); diff --git a/paddle/phi/tests/kernels/test_math_function.cc b/paddle/phi/tests/kernels/test_math_function.cc index b21cf0203febe..bcb0e9d7adc7e 100644 --- a/paddle/phi/tests/kernels/test_math_function.cc +++ b/paddle/phi/tests/kernels/test_math_function.cc @@ -26,9 +26,9 @@ inline phi::funcs::BlasT GetBlas( } TEST(math_function, gemm_notrans_cblas) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input2; - paddle::framework::Tensor input3; + phi::DenseTensor input1; + phi::DenseTensor input2; + phi::DenseTensor input3; int m = 2; int n = 3; @@ -71,10 +71,10 @@ TEST(math_function, gemm_notrans_cblas) { #ifdef PADDLE_WITH_LIBXSMM template void MklSmmCompare(int m, int n, int k) { - paddle::framework::Tensor mat_a; - paddle::framework::Tensor mat_b; - paddle::framework::Tensor mat_c_smm; - paddle::framework::Tensor mat_c_mkl; + phi::DenseTensor mat_a; + phi::DenseTensor mat_b; + phi::DenseTensor mat_c_smm; + phi::DenseTensor mat_c_mkl; auto* cpu_place = new paddle::platform::CPUPlace(); T* A = mat_a.mutable_data({m, k}, *cpu_place); @@ -147,9 +147,9 @@ TEST(math_function, gemm_mkl_vs_smm) { #endif TEST(math_function, gemm_trans_cblas) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input2; - paddle::framework::Tensor input3; + phi::DenseTensor input1; + phi::DenseTensor input2; + phi::DenseTensor input3; int m = 2; int n = 3; @@ -193,7 +193,7 @@ TEST(math_function, gemm_trans_cblas) { } TEST(math_function, zero) { - paddle::framework::Tensor tensor; + phi::DenseTensor tensor; auto* cpu_place = new paddle::platform::CPUPlace(); float* t = tensor.mutable_data({2, 2}, *cpu_place); phi::CPUContext context(*cpu_place); @@ -214,9 +214,9 @@ TEST(math_function, zero) { template void GemvTest(int m, int n, bool trans) { - paddle::framework::Tensor mat_a; - paddle::framework::Tensor vec_b; - paddle::framework::Tensor vec_c; + phi::DenseTensor mat_a; + phi::DenseTensor vec_b; + phi::DenseTensor vec_c; auto* cpu_place = new paddle::platform::CPUPlace(); int b_num = trans ? m : n; int c_num = trans ? n : m; @@ -269,7 +269,7 @@ TEST(math_function, gemv) { } TEST(math_funciton, set_constant) { - paddle::framework::Tensor t; + phi::DenseTensor t; t.Resize({10, 10}); t.mutable_data(paddle::platform::CPUPlace()); auto* ctx = new phi::CPUContext(); @@ -287,10 +287,10 @@ TEST(math_funciton, set_constant) { template void GemmWarpTest(int m, int n, int k, T alpha, T beta) { - paddle::framework::Tensor mat_a; - paddle::framework::Tensor mat_b; - paddle::framework::Tensor mat_c_ref; - paddle::framework::Tensor mat_c_mkl; + phi::DenseTensor mat_a; + phi::DenseTensor mat_b; + phi::DenseTensor mat_c_ref; + phi::DenseTensor mat_c_mkl; auto* cpu_place = new paddle::platform::CPUPlace(); T* A = mat_a.mutable_data({m, k}, *cpu_place); diff --git a/paddle/phi/tests/kernels/test_math_function.cu b/paddle/phi/tests/kernels/test_math_function.cu index 479d874626a4e..b227523ce0bc5 100644 --- a/paddle/phi/tests/kernels/test_math_function.cu +++ b/paddle/phi/tests/kernels/test_math_function.cu @@ -43,11 +43,11 @@ inline phi::funcs::BlasT GetBlas( } TEST(math_function, notrans_mul_trans_fp32) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor out_gpu; - paddle::framework::Tensor out; + phi::DenseTensor input1; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor out_gpu; + phi::DenseTensor out; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -79,11 +79,11 @@ TEST(math_function, notrans_mul_trans_fp32) { } TEST(math_function, notrans_mul_trans_fp16) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor out_gpu; - paddle::framework::Tensor out; + phi::DenseTensor input1; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor out_gpu; + phi::DenseTensor out; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -126,11 +126,11 @@ TEST(math_function, notrans_mul_trans_fp16) { } TEST(math_function, trans_mul_notrans_fp32) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor out_gpu; - paddle::framework::Tensor out; + phi::DenseTensor input1; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor out_gpu; + phi::DenseTensor out; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -168,11 +168,11 @@ TEST(math_function, trans_mul_notrans_fp32) { } TEST(math_function, trans_mul_notrans_fp16) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor out_gpu; - paddle::framework::Tensor out; + phi::DenseTensor input1; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor out_gpu; + phi::DenseTensor out; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -220,12 +220,12 @@ TEST(math_function, trans_mul_notrans_fp16) { } TEST(math_function, gemm_notrans_cublas_fp32) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input2; - paddle::framework::Tensor input3; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor input3_gpu; + phi::DenseTensor input1; + phi::DenseTensor input2; + phi::DenseTensor input3; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor input3_gpu; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -278,12 +278,12 @@ TEST(math_function, gemm_notrans_cublas_fp32) { } TEST(math_function, gemm_notrans_cublas_fp16) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input2; - paddle::framework::Tensor input3; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor input3_gpu; + phi::DenseTensor input1; + phi::DenseTensor input2; + phi::DenseTensor input3; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor input3_gpu; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -355,12 +355,12 @@ TEST(math_function, gemm_notrans_cublas_fp16) { } TEST(math_function, gemm_trans_cublas_fp32) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input2; - paddle::framework::Tensor input3; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor input3_gpu; + phi::DenseTensor input1; + phi::DenseTensor input2; + phi::DenseTensor input3; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor input3_gpu; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -407,12 +407,12 @@ TEST(math_function, gemm_trans_cublas_fp32) { } TEST(math_function, gemm_trans_cublas_fp16) { - paddle::framework::Tensor input1; - paddle::framework::Tensor input2; - paddle::framework::Tensor input3; - paddle::framework::Tensor input1_gpu; - paddle::framework::Tensor input2_gpu; - paddle::framework::Tensor input3_gpu; + phi::DenseTensor input1; + phi::DenseTensor input2; + phi::DenseTensor input3; + phi::DenseTensor input1_gpu; + phi::DenseTensor input2_gpu; + phi::DenseTensor input3_gpu; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -479,9 +479,9 @@ TEST(math_function, gemm_trans_cublas_fp16) { template void GemvTest(int m, int n, bool trans) { - paddle::framework::Tensor mat_a; - paddle::framework::Tensor vec_b; - paddle::framework::Tensor vec_c; + phi::DenseTensor mat_a; + phi::DenseTensor vec_b; + phi::DenseTensor vec_c; paddle::platform::CPUPlace cpu_place; paddle::platform::CUDAPlace gpu_place(0); @@ -495,9 +495,9 @@ void GemvTest(int m, int n, bool trans) { T* data_b = vec_b.mutable_data({trans ? m : n}, cpu_place); T* data_c = vec_c.mutable_data({trans ? n : m}, cpu_place); - paddle::framework::Tensor g_mat_a; - paddle::framework::Tensor g_vec_b; - paddle::framework::Tensor g_vec_c; + phi::DenseTensor g_mat_a; + phi::DenseTensor g_vec_b; + phi::DenseTensor g_vec_c; T* g_data_a = g_mat_a.mutable_data(mat_a.dims(), gpu_place); T* g_data_b = g_vec_b.mutable_data(vec_b.dims(), gpu_place); T* g_data_c = g_vec_c.mutable_data(vec_c.dims(), gpu_place); diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc index c9a3f7a9071b5..c3c9f2bd617fe 100644 --- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc @@ -16,17 +16,14 @@ #include "paddle/fluid/framework/custom_raw_op_kernel_func.h" #include "paddle/fluid/platform/enforce.h" -void ReluCPUForward(const paddle::framework::Tensor &x, - paddle::framework::Tensor *y) { +void ReluCPUForward(const phi::DenseTensor &x, phi::DenseTensor *y) { custom_raw_op::ReluForward(x, y); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -void ReluGPUForward(const paddle::framework::Tensor &x, - paddle::framework::Tensor *y); +void ReluGPUForward(const phi::DenseTensor &x, phi::DenseTensor *y); #else -void ReluGPUForward(const paddle::framework::Tensor &x, - paddle::framework::Tensor *y) { +void ReluGPUForward(const phi::DenseTensor &x, phi::DenseTensor *y) { PADDLE_THROW(paddle::platform::errors::Unimplemented( "ReluGPUForward is not supported when not compiled with GPU.")); } diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cu b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cu index 72cab225d13a5..afdb73a328162 100644 --- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cu +++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cu @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include "custom_raw_op_kernel_op.h" // NOLINT +#include -void ReluGPUForward(const paddle::framework::Tensor &x, - paddle::framework::Tensor *y) { +void ReluGPUForward(const phi::DenseTensor &x, phi::DenseTensor *y) { custom_raw_op::ReluForward(x, y); } diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h index 70919708e19dd..24cea81b9eb91 100644 --- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h +++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.h @@ -23,8 +23,7 @@ namespace custom_raw_op { struct ReluFunctor { - explicit ReluFunctor(const paddle::framework::Tensor &x, - paddle::framework::Tensor *y) + explicit ReluFunctor(const phi::DenseTensor &x, phi::DenseTensor *y) : x_(x), y_(y) {} template @@ -72,12 +71,11 @@ struct ReluFunctor { } private: - const paddle::framework::Tensor &x_; - paddle::framework::Tensor *y_; + const phi::DenseTensor &x_; + phi::DenseTensor *y_; }; -inline void ReluForward(const paddle::framework::Tensor &x, - paddle::framework::Tensor *y) { +inline void ReluForward(const phi::DenseTensor &x, phi::DenseTensor *y) { custom_raw_op::ReluFunctor functor(x, y); paddle::framework::VisitDataType( paddle::framework::TransToProtoVarType(x.dtype()), functor); From 0d399f69b131c4171e0f6cc190fee7f41149cfd0 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 23 Sep 2022 03:32:43 +0000 Subject: [PATCH 02/15] remove needless using tensor --- paddle/fluid/framework/tensor_util_test.cc | 30 ++++++++----------- .../fused/fused_multi_transformer_op.cc | 2 -- .../fluid/operators/gather_scatter_kernel.cc | 4 +-- paddle/phi/kernels/gpu/depthwise_conv.h | 4 +-- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index 3d3c7de73b729..2e07d3aa5a638 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - -#include - #include "paddle/fluid/framework/tensor_util.h" +#include #include "paddle/fluid/operators/isfinite_op.h" +#include + namespace paddle { namespace framework { @@ -256,22 +255,19 @@ TEST(TensorToVector, Tensor) { #endif } -TEST(TensorToVector, Tensor_bool) { - phi::DenseTensor src; - bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); - for (int i = 0; i < 3 * 3; ++i) { - src_ptr[i] = static_cast(i % 2); - } +TEST(TensorToVector, Tensor_bool){{phi::DenseTensor src; +bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); +for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = static_cast(i % 2); +} - paddle::platform::CPUPlace place; - std::vector dst; - paddle::framework::TensorToVector(src, &dst); +paddle::platform::CPUPlace place; +std::vector dst; +paddle::framework::TensorToVector(src, &dst); - for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_ptr[i], dst[i]); - } +for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); } - } // namespace framework #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc index ede6300decbe5..cb5d5b17dfeb6 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/fused/fused_multi_transformer_op.h" - #include #include diff --git a/paddle/fluid/operators/gather_scatter_kernel.cc b/paddle/fluid/operators/gather_scatter_kernel.cc index e05a214dcb4c1..b8c870cd77569 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cc +++ b/paddle/fluid/operators/gather_scatter_kernel.cc @@ -120,8 +120,8 @@ struct cpu_gather_scatter_functor { self_idx = is_scatter_like ? replace_index : index_idx; src_idx = is_scatter_like ? index_idx : replace_index; - reduce_op(static_cast(self_data + self_idx), - static_cast(src_data + src_idx)); + reduce_op((tensor_t*)(self_data + self_idx), + (tensor_t*)(src_data + src_idx)); index_idx++; } } diff --git a/paddle/phi/kernels/gpu/depthwise_conv.h b/paddle/phi/kernels/gpu/depthwise_conv.h index 9acd67390face..1fbc7cf9e4a29 100644 --- a/paddle/phi/kernels/gpu/depthwise_conv.h +++ b/paddle/phi/kernels/gpu/depthwise_conv.h @@ -176,8 +176,8 @@ __device__ __inline__ void KernelDepthwiseConvNCHW( int offset = in_offset + h_in * input_width + w_in; T in_data = input_data[offset]; if (fuse_relu_before_conv) { - value += - weight[weight_offset] * static(max(0.0f, double(in_data))); + value += weight[weight_offset] * + T(max(0.0f, static_cast(in_data))); // NOLINT } else { value += weight[weight_offset] * in_data; } From 6864dfc7c8e51f3ce80f3f36bb3db395a4ee08f5 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 23 Sep 2022 06:39:20 +0000 Subject: [PATCH 03/15] resolve conflict --- .../operators/collective/c_allreduce_op.h | 10 ++++---- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 23 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 9749f446b8ea1..4d90442afbc5a 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -79,7 +79,7 @@ class CAllReduceOp : public framework::OperatorWithKernel { framework::OpKernelType GetKernelTypeForVar( const std::string& var_name, - const framework::Tensor& tensor, + const phi::DenseTensor& tensor, const framework::OpKernelType& expected_kernel_type) const { if (var_name == "Cond") { return expected_kernel_type; @@ -193,7 +193,7 @@ class CAllReduceOpASCENDKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_ASCEND_CL) if (ctx.HasInput("Cond")) { - auto cond = ctx.Input("Cond"); + auto cond = ctx.Input("Cond"); auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, @@ -327,7 +327,7 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { #if defined(PADDLE_WITH_XPU_BKCL) if (ctx.HasInput("Cond")) { - auto cond = ctx.Input("Cond"); + auto cond = ctx.Input("Cond"); auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, @@ -412,7 +412,7 @@ class CAllReduceOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { if (ctx.HasInput("Cond")) { - auto cond = ctx.Input("Cond"); + auto cond = ctx.Input("Cond"); auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, @@ -533,7 +533,7 @@ class CAllReduceOpMLUKernel : public framework::OpKernel { auto out = ctx.Output("Out"); if (ctx.HasInput("Cond")) { - auto cond = ctx.Input("Cond"); + auto cond = ctx.Input("Cond"); auto place = cond->place(); PADDLE_ENFORCE_EQ(platform::is_cpu_place(place), true, diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 00ae785bca95d..a4853131de161 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -27,7 +27,6 @@ using dnnl::primitive; using dnnl::reorder; using dnnl::stream; using framework::DataLayout; -using framework::Tensor; using platform::to_void_cast; template @@ -38,8 +37,8 @@ class PoolingMKLDNNHandler public: PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, - const Tensor* input, - Tensor* output) + const phi::DenseTensor* input, + phi::DenseTensor* output) : platform::MKLDNNHandlerNoCachingT( @@ -131,9 +130,9 @@ class PoolingMKLDNNHandler PoolingMKLDNNHandler(const paddle::framework::ExecutionContext& ctx, const dnnl::engine mkldnn_engine, - const Tensor* in_x, - const Tensor* out_grad, - Tensor* in_x_grad) + const phi::DenseTensor* in_x, + const phi::DenseTensor* out_grad, + phi::DenseTensor* in_x_grad) : platform::MKLDNNHandlerNoCachingT { auto& dev_ctx = ctx.template device_context(); - const Tensor* input = ctx.Input("X"); - Tensor* output = ctx.Output("Out"); + const phi::DenseTensor* input = ctx.Input("X"); + phi::DenseTensor* output = ctx.Output("Out"); PoolingMKLDNNHandler handler(ctx, dev_ctx.GetEngine(), input, output); @@ -347,9 +346,11 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { true, paddle::platform::errors::PreconditionNotMet( "Operator DNNL PoolGrad must use CPUPlace")); - const Tensor* in_x = ctx.Input("X"); - const Tensor* out_grad = ctx.Input(framework::GradVarName("Out")); - Tensor* in_x_grad = ctx.Output(framework::GradVarName("X")); + const phi::DenseTensor* in_x = ctx.Input("X"); + const phi::DenseTensor* out_grad = + ctx.Input(framework::GradVarName("Out")); + phi::DenseTensor* in_x_grad = + ctx.Output(framework::GradVarName("X")); auto& dev_ctx = ctx.template device_context(); From e868f5950fe2c9df8124e63faceac6267558cc39 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 23 Sep 2022 12:11:08 +0000 Subject: [PATCH 04/15] replace tensor using --- .../paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc index c3c9f2bd617fe..262a01f1eb044 100644 --- a/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc +++ b/python/paddle/fluid/tests/custom_op/custom_raw_op_kernel_op.cc @@ -31,8 +31,8 @@ void ReluGPUForward(const phi::DenseTensor &x, phi::DenseTensor *y) { __PD_DEFINE_RAW_OP_KERNEL_FUNC(custom_raw_relu, ctx) { namespace f = paddle::framework; - const auto *x = ctx.Input("X"); - auto *y = ctx.Output("Y"); + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Y"); PADDLE_ENFORCE_NOT_NULL(x, paddle::platform::errors::InvalidArgument( "Input(X) should not be nullptr.")); From d47fac1064401df7b0bf46cd4410cf42f9848b7b Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 23 Sep 2022 12:41:59 +0000 Subject: [PATCH 05/15] fix format error --- paddle/fluid/framework/tensor_util_test.cc | 5 ++--- paddle/fluid/operators/gather_scatter_kernel.cc | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index 2e07d3aa5a638..6fbfa503da657 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/tensor_util.h" #include -#include "paddle/fluid/operators/isfinite_op.h" - #include +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/isfinite_op.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/operators/gather_scatter_kernel.cc b/paddle/fluid/operators/gather_scatter_kernel.cc index b8c870cd77569..b579b3175d396 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cc +++ b/paddle/fluid/operators/gather_scatter_kernel.cc @@ -120,8 +120,8 @@ struct cpu_gather_scatter_functor { self_idx = is_scatter_like ? replace_index : index_idx; src_idx = is_scatter_like ? index_idx : replace_index; - reduce_op((tensor_t*)(self_data + self_idx), - (tensor_t*)(src_data + src_idx)); + reduce_op((tensor_t*)(self_data + self_idx), // NOLINT + (tensor_t*)(src_data + src_idx)); // NOLINT index_idx++; } } From 7328bd37b7f69c687380090d1ff90b25088deda4 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 03:09:23 +0000 Subject: [PATCH 06/15] revert needless changing --- paddle/fluid/framework/tensor_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc index c9d740dcf8fc4..378e56918a320 100644 --- a/paddle/fluid/framework/tensor_test.cc +++ b/paddle/fluid/framework/tensor_test.cc @@ -41,8 +41,8 @@ TEST(DenseTensor, DataAssert) { } catch (platform::EnforceNotMet& err) { caught = true; std::string ex_msg = err.what(); - EXPECT_TRUE(ex_msg.find("phi::DenseTensor holds no memory. Call " - "phi::DenseTensor::mutable_data firstly.") != + EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " + "Tensor::mutable_data firstly.") != std::string::npos); } ASSERT_TRUE(caught); @@ -185,8 +185,8 @@ TEST(DenseTensor, ShareDataWith) { } catch (paddle::platform::EnforceNotMet& err) { caught = true; std::string ex_msg = err.what(); - EXPECT_TRUE(ex_msg.find("phi::DenseTensor holds no memory. Call " - "phi::DenseTensor::mutable_data firstly.") != + EXPECT_TRUE(ex_msg.find("Tensor holds no memory. Call " + "Tensor::mutable_data firstly.") != std::string::npos); } ASSERT_TRUE(caught); From 759f20364a2a9a591fefff97e8126ac5939048c7 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 05:25:08 +0000 Subject: [PATCH 07/15] fix rocm and npu compile error --- paddle/fluid/operators/activation_op_npu.cc | 4 ++-- paddle/fluid/platform/device/gpu/rocm/miopen_desc.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/activation_op_npu.cc b/paddle/fluid/operators/activation_op_npu.cc index 52a472a595a92..3c6e207b971bc 100644 --- a/paddle/fluid/operators/activation_op_npu.cc +++ b/paddle/fluid/operators/activation_op_npu.cc @@ -873,8 +873,8 @@ template class ExpNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); + auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); out->mutable_data(ctx.GetPlace()); const auto& runner = NpuOpRunner("Exp", {*x}, {*out}, {}); auto stream = diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h index 158693f5dad70..8faae285e49e3 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_desc.h @@ -129,7 +129,7 @@ class TensorDescriptor { T* desc() { return desc_.get(); } T* desc() const { return desc_.get(); } - void set(const Tensor& tensor, const int groups = 1) { + void set(const phi::DenseTensor& tensor, const int groups = 1) { auto dims = phi::vectorize(tensor.dims()); std::vector strides(dims.size()); strides[dims.size() - 1] = 1; @@ -148,7 +148,7 @@ class TensorDescriptor { const_cast(strides.data()))); } - void set(const Tensor& tensor, const miopenTensorFormat_t format) { + void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format) { const int groups = 1; PADDLE_ENFORCE_EQ(format, MIOPEN_TENSOR_NCHW, @@ -195,7 +195,7 @@ class FilterDescriptor { T* desc() { return desc_.get(); } T* desc() const { return desc_.get(); } - void set(const Tensor& tensor, + void set(const phi::DenseTensor& tensor, const miopenTensorFormat_t format, const int groups = 1) { PADDLE_ENFORCE_EQ(format, From 2efd90d7f77aba40ba87fc7a93324a4de8d8b9da Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 06:32:26 +0000 Subject: [PATCH 08/15] fix cinn compile error --- .../paddle2cinn/cinn_graph_symbolization.cc | 4 ++-- .../paddle2cinn/cinn_graph_symbolization_test.cc | 2 +- .../fluid/framework/paddle2cinn/cinn_lib_test.cc | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc index 79ba56ab147a3..0e1a75ebe64ee 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc @@ -37,7 +37,7 @@ namespace paddle2cinn { using ir::Graph; using ir::Node; -using CinnTensor = ::cinn::hlir::Tensor; +using CinnTensor = ::cinn::hlir::framework::Tensor; using OpMapperContext = CinnGraphSymbolization::OpMapperContext; using CinnOpDesc = CinnGraphSymbolization::CinnOpDesc; using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; @@ -45,7 +45,7 @@ using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; namespace utils { OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor( - const phi::DenseTensor& tensor, bool skip_trans_type = false) { + const Tensor& tensor, bool skip_trans_type = false) { OpMapperContext::FeedInfo info; const auto& dim = tensor.dims(); for (int i = 0; i < dim.size(); i++) { diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc index 929f009b2a3a2..12bd9564c1ae3 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc @@ -24,7 +24,7 @@ namespace paddle2cinn { using ::cinn::frontend::NetBuilder; using ir::Graph; using ir::Node; -using CinnTensor = ::cinn::hlir::Tensor; +using CinnTensor = ::cinn::hlir::framework::Tensor; using OpMapperContext = CinnGraphSymbolization::OpMapperContext; using CinnOpDesc = CinnGraphSymbolization::CinnOpDesc; using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; diff --git a/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc index ee030bb39caa9..2dd09771cc5ea 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_lib_test.cc @@ -52,7 +52,7 @@ Program CreateAddProgram() { return program; } -void SetRandData(hlir::Tensor tensor, Target target) { +void SetRandData(hlir::framework::Tensor tensor, Target target) { auto* data = tensor->mutable_data(target); std::random_device seed; std::default_random_engine engine(seed()); @@ -96,8 +96,8 @@ TEST(net_build, program_execute_multi_elementwise_add) { hlir::framework::GraphCompiler gc(target, scope, graph); auto runtime_program = gc.Build(); - scope->Var("A"); - scope->Var("B"); + scope->Var("A"); + scope->Var("B"); auto A = scope->GetTensor("A"); auto B = scope->GetTensor("B"); @@ -133,10 +133,10 @@ TEST(net_build, program_execute_fc) { hlir::framework::GraphCompiler gc(target, scope, graph); auto runtime_program = gc.Build(); - scope->Var(std::string(a.id())); - scope->Var(std::string(w.id())); - scope->Var(std::string(b.id())); - scope->Var(std::string(mul_out->id)); + scope->Var(std::string(a.id())); + scope->Var(std::string(w.id())); + scope->Var(std::string(b.id())); + scope->Var(std::string(mul_out->id)); auto a_ten = scope->GetTensor(std::string(a.id())); auto w_ten = scope->GetTensor(std::string(w.id())); From 0fc92e401235575fcf7200afbc04674b19d3ea7b Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 06:57:01 +0000 Subject: [PATCH 09/15] fix format error --- paddle/fluid/framework/tensor_util_test.cc | 120 +++++++++--------- .../fused/fused_multi_transformer_int8_op.cu | 2 +- .../fused/fused_multi_transformer_op.cu | 2 +- ...r_op.h => fused_multi_transformer_op.cu.h} | 0 .../operators/mkldnn/dequantize_mkldnn_op.cc | 6 +- 5 files changed, 66 insertions(+), 64 deletions(-) rename paddle/fluid/operators/fused/{fused_multi_transformer_op.h => fused_multi_transformer_op.cu.h} (100%) diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index 6fbfa503da657..9097c43023bd2 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -254,78 +254,78 @@ TEST(TensorToVector, Tensor) { #endif } -TEST(TensorToVector, Tensor_bool){{phi::DenseTensor src; -bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); -for (int i = 0; i < 3 * 3; ++i) { - src_ptr[i] = static_cast(i % 2); -} +TEST(TensorToVector, Tensor_bool) { + phi::DenseTensor src; + bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); + for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = static_cast(i % 2); + } -paddle::platform::CPUPlace place; -std::vector dst; -paddle::framework::TensorToVector(src, &dst); + paddle::platform::CPUPlace place; + std::vector dst; + paddle::framework::TensorToVector(src, &dst); -for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_ptr[i], dst[i]); -} -} // namespace framework + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); + } #ifdef PADDLE_WITH_CUDA -{ - std::vector src_vec = { - false, - true, - false, - true, - false, - true, - false, - true, - false, - }; - phi::DenseTensor gpu_tensor; - paddle::platform::CUDAPlace place; - phi::GPUContext gpu_ctx(place); - gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() - .GetAllocator(place, gpu_ctx.stream()) - .get()); - gpu_ctx.PartialInitWithAllocator(); - paddle::framework::TensorFromVector(src_vec, gpu_ctx, &gpu_tensor); + { + std::vector src_vec = { + false, + true, + false, + true, + false, + true, + false, + true, + false, + }; + phi::DenseTensor gpu_tensor; + paddle::platform::CUDAPlace place; + phi::GPUContext gpu_ctx(place); + gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(place, gpu_ctx.stream()) + .get()); + gpu_ctx.PartialInitWithAllocator(); + paddle::framework::TensorFromVector(src_vec, gpu_ctx, &gpu_tensor); - std::vector dst; - paddle::framework::TensorToVector(gpu_tensor, gpu_ctx, &dst); + std::vector dst; + paddle::framework::TensorToVector(gpu_tensor, gpu_ctx, &dst); - for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_vec[i], dst[i]); + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); + } } -} #endif #ifdef PADDLE_WITH_ASCEND_CL -{ - std::vector src_vec = { - false, - true, - false, - true, - false, - true, - false, - true, - false, - }; - phi::DenseTensor npu_tensor; - paddle::platform::NPUPlace place(0); - paddle::platform::NPUDeviceContext npu_ctx(place); - paddle::framework::TensorFromVector(src_vec, npu_ctx, &npu_tensor); - - std::vector dst; - paddle::framework::TensorToVector(npu_tensor, npu_ctx, &dst); + { + std::vector src_vec = { + false, + true, + false, + true, + false, + true, + false, + true, + false, + }; + phi::DenseTensor npu_tensor; + paddle::platform::NPUPlace place(0); + paddle::platform::NPUDeviceContext npu_ctx(place); + paddle::framework::TensorFromVector(src_vec, npu_ctx, &npu_tensor); + + std::vector dst; + paddle::framework::TensorToVector(npu_tensor, npu_ctx, &dst); - for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_vec[i], dst[i]); + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); + } } -} #endif -} // namespace paddle +} TEST(TensorFromDLPack, Tensor) { { diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu index fe1ee3449a102..681748c71c91a 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_int8_op.cu @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/attn_gemm_int8.h" -#include "paddle/fluid/operators/fused/fused_multi_transformer_op.h" +#include "paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index b70f0c7ea1965..01464b7241655 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -9,7 +9,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/fused/fused_multi_transformer_op.h" +#include "paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.h b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h similarity index 100% rename from paddle/fluid/operators/fused/fused_multi_transformer_op.h rename to paddle/fluid/operators/fused/fused_multi_transformer_op.cu.h diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 4ceddf53f9458..29cccfc9fb0d4 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -12,14 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "dnnl.hpp" // NOLINT +#include "paddle/fluid/operators/dequantize_op.h" + #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/dequantize_op.h" #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +#include "dnnl.hpp" // NOLINT + namespace paddle { namespace operators { From f3bbc1619a5db69dfc589bf39d228984aefee315 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 07:13:29 +0000 Subject: [PATCH 10/15] fix mkldnn format error --- paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 29cccfc9fb0d4..12ac31804a0db 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -20,8 +20,6 @@ limitations under the License. */ #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_reuse.h" -#include "dnnl.hpp" // NOLINT - namespace paddle { namespace operators { From 3db39803de3b96d2848d0d647f9212d5406be633 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 09:18:52 +0000 Subject: [PATCH 11/15] fix mkldnn format error --- paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index af8843c74179e..ae94266b4da71 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "dnnl.hpp" +#include "paddle/fluid/operators/quantize_op.h" + #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/quantize_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_reuse.h" From fd6425b12f08edea7576e266a96dd58646f6cee2 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 13:20:25 +0000 Subject: [PATCH 12/15] fix cinn compile error --- paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc index 0e1a75ebe64ee..b54a94b5149ca 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc @@ -45,7 +45,7 @@ using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; namespace utils { OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor( - const Tensor& tensor, bool skip_trans_type = false) { + const CinnTensor& tensor, bool skip_trans_type = false) { OpMapperContext::FeedInfo info; const auto& dim = tensor.dims(); for (int i = 0; i < dim.size(); i++) { From 00db968363e19df42533b556cdbe740d0c73e770 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 26 Sep 2022 14:09:56 +0000 Subject: [PATCH 13/15] fix cinn compile error --- paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc index b54a94b5149ca..94bc1241895ef 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc @@ -45,7 +45,7 @@ using FeedInfoMap = CinnGraphSymbolization::FeedInfoMap; namespace utils { OpMapperContext::FeedInfo GetCinnFeedInfoFromTensor( - const CinnTensor& tensor, bool skip_trans_type = false) { + const phi::DenseTensor& tensor, bool skip_trans_type = false) { OpMapperContext::FeedInfo info; const auto& dim = tensor.dims(); for (int i = 0; i < dim.size(); i++) { From e1751ac629edc256e14063dfbaad7591026729fa Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 27 Sep 2022 02:29:08 +0000 Subject: [PATCH 14/15] fix cinn compile error --- paddle/fluid/operators/cinn/cinn_launch_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index a868a182bfc5e..0bbbcc8b03177 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -52,7 +52,7 @@ class CinnCompiledObject; namespace operators::details { -using CinnTensor = ::cinn::hlir::Tensor; +using CinnTensor = ::cinn::hlir::framework::Tensor; using CinnScope = ::cinn::hlir::framework::Scope; using CinnCompiledObject = framework::paddle2cinn::CinnCompiledObject; From 6cf126d2b0655dfa185600ee9dc1e78c06c58e3f Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 27 Sep 2022 11:25:20 +0000 Subject: [PATCH 15/15] resolve conflict --- paddle/fluid/pybind/tensor.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/pybind/tensor.cc b/paddle/fluid/pybind/tensor.cc index ca795dbe6e001..6441718e4116f 100644 --- a/paddle/fluid/pybind/tensor.cc +++ b/paddle/fluid/pybind/tensor.cc @@ -1115,10 +1115,9 @@ void BindTensor(pybind11::module &m) { // NOLINT [](const phi::SparseCooTensor &self) -> int64_t { return self.numel(); }) - .def("indices", - [](const phi::SparseCooTensor &self) -> framework::Tensor { - return self.indices(); - }); + .def("indices", [](const phi::SparseCooTensor &self) -> phi::DenseTensor { + return self.indices(); + }); } } // namespace pybind