From c1ac58bb7a57e0a5ca4e661ea38f7dfd98ddca96 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Tue, 30 Nov 2021 10:37:02 +0000 Subject: [PATCH 1/2] Handled dispensable tensors in AutoCodeGen for Eager Dygraph --- .../auto_code_generator/eager_generator.cc | 264 +++++++++++++----- paddle/fluid/pybind/op_function_generator.cc | 104 +------ paddle/fluid/pybind/op_function_generator.h | 121 ++++++++ 3 files changed, 314 insertions(+), 175 deletions(-) create mode 100644 paddle/fluid/pybind/op_function_generator.h diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index c0714775da852..136eaebe2cc4b 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -22,6 +22,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/pybind/op_function_generator.h" #include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/string/string_helper.h" @@ -358,18 +359,149 @@ static bool CheckOpProto(proto::OpProto* op_proto) { return true; } +/* --------------------------------------- */ +/* --------- Preprocess Ins/Outs --------- */ +/* --------------------------------------- */ +static void PurifyOpProto( + const proto::OpProto& op_proto, + std::unordered_map* fwd_inputs_name_pos_map, + std::unordered_map* fwd_outputs_name_pos_map, + std::map* grad_outs_slotname_map, + std::map* grad_ins_fwd_slotname_map, + std::map* grad_ins_grad_slotname_map, + std::vector* in_vars, + std::vector* out_vars, + std::map>>* + grad_ins, + std::map>>* + grad_outs) { + // Op Name + const std::string op_name = op_proto.type(); + + // Handle dispensable inputs + for (const proto::OpProto::Var& input : op_proto.inputs()) { + std::string input_name = input.name(); + + // Delete dispensable tensor unless specified in op_ins_map + if (input.dispensable()) { + if (!op_ins_map.count(op_name) || + !op_ins_map[op_name].count(input_name)) { + VLOG(6) << "Removing Dispensable Input: " << input_name; + + // in_vars + auto iter = in_vars->begin(); + for (iter = in_vars->begin(); iter != in_vars->end(); iter++) { + if (iter->name() == input_name) { + break; + } + } + in_vars->erase(iter); + + // grad_outs_slotname_map + auto grad_outs_slotname_map_purified = *grad_outs_slotname_map; + for (const auto& iter : *grad_outs_slotname_map) { + const std::string& grad_output_name = iter.first; + const std::string& matched_input_name = iter.second; + if (matched_input_name == input_name) { + grad_outs_slotname_map_purified.erase(grad_output_name); + + PADDLE_ENFORCE( + grad_outs->count(grad_output_name) > 0, + paddle::platform::errors::Fatal( + "Unable to find gradient output name in grad_outs.")); + // grad_outs + grad_outs->erase(grad_output_name); + } + } + *grad_outs_slotname_map = grad_outs_slotname_map_purified; + + // grad_ins_fwd_slotname_map: output as tensorwrapper + if (grad_ins_fwd_slotname_map->count(input_name)) + grad_ins_fwd_slotname_map->erase(input_name); + + // grad_ins: output as tensorwrapper + if (grad_ins->count(input_name)) grad_ins->erase(input_name); + } + } + } + + for (const proto::OpProto::Var& output : op_proto.outputs()) { + std::string output_name = output.name(); + + // Delete dispensable tensor unless specified in op_outs_map + if (output.dispensable()) { + if (!op_outs_map.count(op_name) || + !op_outs_map[op_name].count(output_name)) { + VLOG(6) << "Removing Dispensable Output: " << output_name; + + // out_vars + auto iter = out_vars->begin(); + for (iter = out_vars->begin(); iter != out_vars->end(); iter++) { + if (iter->name() == output_name) { + break; + } + } + out_vars->erase(iter); + + // grad_ins_grad_slotname_map + auto grad_ins_grad_slotname_map_purified = *grad_ins_grad_slotname_map; + for (const auto& iter : *grad_ins_grad_slotname_map) { + const std::string& grad_input_name = iter.first; + const std::string& matched_output_name = iter.second; + if (matched_output_name == output_name) { + grad_ins_grad_slotname_map_purified.erase(grad_input_name); + + PADDLE_ENFORCE( + grad_ins->count(grad_input_name) > 0, + paddle::platform::errors::Fatal( + "Unable to find gradient input name in grad_ins.")); + // grad_ins + grad_ins->erase(grad_input_name); + } + } + *grad_ins_grad_slotname_map = grad_ins_grad_slotname_map_purified; + + // grad_ins_fwd_slotname_map: output as tensorwrapper + if (grad_ins_fwd_slotname_map->count(output_name)) + grad_ins_fwd_slotname_map->erase(output_name); + + // grad_ins: output as tensorwrapper + if (grad_ins->count(output_name)) grad_ins->erase(output_name); + } + } + } + + /* ------ Maping forward slot name to fwd position ------ */ + size_t in_pos = 0; + for (const auto& var : *in_vars) { + VLOG(6) << "Mapping input tensor: " << var.name() + << " To position: " << in_pos; + (*fwd_inputs_name_pos_map)[var.name()] = in_pos; + in_pos++; + } + + size_t out_pos = 0; + for (const auto& var : *out_vars) { + VLOG(6) << "Mapping output tensor: " << var.name() + << " To position: " << out_pos; + (*fwd_outputs_name_pos_map)[var.name()] = out_pos; + out_pos++; + } +} + /* -------------------------------- */ /* --------- Collect Info --------- */ /* -------------------------------- */ static bool CollectInformationFromOpInfo( const paddle::framework::OpInfo& op_info, - std::vector* grad_node_default_attr_maps, std::vector* grad_op_types, - std::unordered_map* fwd_inputs_name_pos_map, - std::unordered_map* fwd_outputs_name_pos_map, std::map* grad_outs_slotname_map, std::map* grad_ins_fwd_slotname_map, std::map* grad_ins_grad_slotname_map, + std::vector* in_vars, + std::vector* out_vars, std::map>>* grad_ins, @@ -380,6 +512,13 @@ static bool CollectInformationFromOpInfo( const std::string& op_type = op_proto.type(); std::vector dims = {1, 1, 1, 1}; + for (const proto::OpProto::Var& input : op_proto.inputs()) { + in_vars->push_back(input); + } + for (const proto::OpProto::Var& output : op_proto.outputs()) { + out_vars->push_back(output); + } + /* ------ Prepare "ins" ------ */ std::map>> @@ -494,7 +633,6 @@ static bool CollectInformationFromOpInfo( for (auto iter = grad_node->begin(); iter < grad_node->end(); iter++) { // Each OpBase paddle::imperative::OpBase& op_base = *iter; - grad_node_default_attr_maps->push_back(op_base.DefaultAttrsMap()); grad_op_types->push_back(op_base.Type()); } @@ -538,22 +676,6 @@ static bool CollectInformationFromOpInfo( grad_outs_slotname_map); VLOG(6) << "Finished Slotname Matching for Grad_Outs"; - /* ------ Maping forward slot name to fwd position ------ */ - size_t in_pos = 0; - for (const auto& iter : ins) { - VLOG(6) << "Mapping input tensor: " << iter.first - << " To position: " << in_pos; - (*fwd_inputs_name_pos_map)[iter.first] = in_pos; - in_pos++; - } - size_t out_pos = 0; - for (const auto& iter : outs) { - VLOG(6) << "Mapping output tensor: " << iter.first - << " To position: " << out_pos; - (*fwd_outputs_name_pos_map)[iter.first] = out_pos; - out_pos++; - } - return true; } @@ -561,16 +683,13 @@ static bool CollectInformationFromOpInfo( /* --------- CodeGen: Forward GradNode Creation ------ */ /* --------------------------------------------------- */ static std::string GenerateGradNodeCreationContent( - const std::vector& - grad_node_default_attr_maps, const std::unordered_map& fwd_inputs_name_pos_map, const std::unordered_map& fwd_outputs_name_pos_map, const std::map& grad_ins_fwd_slotname_map, - const proto::OpProto& op_proto) { + const std::string& op_type, const std::vector& in_vars, + const std::vector& out_vars) { VLOG(6) << "Generating GradNode Creation codes"; - const std::string& op_type = op_proto.type(); - // [Generation] Construct GradOpNode // Run ComputeRequiredGrad @@ -578,7 +697,7 @@ static std::string GenerateGradNodeCreationContent( // then generate: "egr::AutogradMeta* p_autograd_out = // egr::EagerUtils::autograd_meta("op_proto->outputs()[0].name()")" std::string get_autograd_meta_str = " // Prepare Autograd Meta \n"; - for (const proto::OpProto::Var& input : op_proto.inputs()) { + for (const proto::OpProto::Var& input : in_vars) { const std::string& input_name = input.name(); const std::string& input_autograd_name = "p_autograd_" + input_name; @@ -602,7 +721,7 @@ static std::string GenerateGradNodeCreationContent( // If single output slotname and not duplicable, // then generate: "egr::AutogradMeta* p_autograd_out = // egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")" - for (const proto::OpProto::Var& output : op_proto.outputs()) { + for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); const std::string& output_autograd_name = "p_autograd_" + output_name; @@ -636,8 +755,8 @@ static std::string GenerateGradNodeCreationContent( // [GradOpNode] Generation std::string grad_node_creation_str = ""; - size_t bwd_in_slot_num = op_proto.outputs().size(); - size_t bwd_out_slot_num = op_proto.inputs().size(); + size_t bwd_in_slot_num = out_vars.size(); + size_t bwd_out_slot_num = in_vars.size(); const char* GRAD_OP_NODE_TEMPLATE = " auto grad_node = std::make_shared(%d, %d);\n"; grad_node_creation_str += " // Create GradOpNode\n"; @@ -669,7 +788,7 @@ static std::string GenerateGradNodeCreationContent( // [GradOpNode] SetGradOutMeta // [GradOpNode] Add Edges std::string compute_require_grad_args = "trace_backward"; - for (const proto::OpProto::Var& input : op_proto.inputs()) { + for (const proto::OpProto::Var& input : in_vars) { const std::string& input_name = input.name(); const std::string& input_autograd_name = "p_autograd_" + input_name; compute_require_grad_args += ", &" + input_autograd_name; @@ -689,7 +808,7 @@ static std::string GenerateGradNodeCreationContent( // [AutogradMeta] SetOutRank // [AutogradMeta] SetHistory std::string pass_stop_gradient_args = "false"; - for (const proto::OpProto::Var& output : op_proto.outputs()) { + for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); const std::string& output_autograd_name = "p_autograd_" + output_name; pass_stop_gradient_args += ", &" + output_autograd_name; @@ -743,8 +862,6 @@ static std::string AppendUseOp(const std::string& op_type) { /* --------- CodeGen: Forward ----- */ /* -------------------------------- */ static std::pair GenerateForwardFunctionContents( - const std::vector& - grad_node_default_attr_maps, const std::unordered_map& fwd_inputs_name_pos_map, const std::unordered_map& fwd_outputs_name_pos_map, const std::map& grad_ins_fwd_slotname_map, @@ -758,7 +875,8 @@ static std::pair GenerateForwardFunctionContents( std::string, std::vector>>& grad_outs, - const proto::OpProto& op_proto) { + const std::string& op_type, const std::vector& in_vars, + const std::vector& out_vars) { /* // Forward Function Example: std::tuple, Tensor, vector> @@ -779,6 +897,7 @@ static std::pair GenerateForwardFunctionContents( ,ConstructDuplicableOutput(Out1Num)} }; // According to op_proto->attrs() + egr::legacy::RunOp("op_type", ins, outs, attr_map, Controller.Instance().GetExpectedPlace(), {}); @@ -795,8 +914,6 @@ static std::pair GenerateForwardFunctionContents( */ VLOG(6) << "Generating Dygraph Forward Function"; - const std::string& op_type = op_proto.type(); - std::string generated_function_body = ""; std::string dygraph_function_args_str = ""; @@ -806,8 +923,8 @@ static std::pair GenerateForwardFunctionContents( // [Generation] Get Ins Map std::string ins_contents_str = ""; - std::vector input_args_str_list(op_proto.inputs().size()); - for (const proto::OpProto::Var& input : op_proto.inputs()) { + std::vector input_args_str_list(in_vars.size()); + for (const proto::OpProto::Var& input : in_vars) { const std::string& input_name = input.name(); size_t input_position = fwd_inputs_name_pos_map.at(input_name); if (input.duplicable()) { @@ -848,7 +965,7 @@ static std::pair GenerateForwardFunctionContents( // [Generation] Get Outs Map std::string outs_contents_str = ""; - for (const proto::OpProto::Var& output : op_proto.outputs()) { + for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); std::string outnum = "1"; if (output.duplicable()) { @@ -898,17 +1015,17 @@ static std::pair GenerateForwardFunctionContents( " egr::Controller::Instance().GetExpectedPlace(),\n" " &default_attrs, true, {});\n"; std::string trace_op_str = - paddle::string::Sprintf(FWD_TRACE_OP_TEMPLATE, op_proto.type()); + paddle::string::Sprintf(FWD_TRACE_OP_TEMPLATE, op_type); generated_function_body += trace_op_str; generated_function_body += "\n"; VLOG(6) << "Generated AttrMap & TraceOp"; // [Generation] Convert output VarBase to Vector/Tensor - size_t output_size = op_proto.outputs().size(); + size_t output_size = out_vars.size(); std::vector return_contents(output_size); std::vector return_types(output_size); - for (const proto::OpProto::Var& output : op_proto.outputs()) { + for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); std::string out_tensor_str; size_t return_position = fwd_outputs_name_pos_map.at(output_name); @@ -937,8 +1054,8 @@ static std::pair GenerateForwardFunctionContents( // [Generation] ComputeRequireGrad -> GradNodeCreation std::string grad_node_creation_body_str = GenerateGradNodeCreationContent( - grad_node_default_attr_maps, fwd_inputs_name_pos_map, - fwd_outputs_name_pos_map, grad_ins_fwd_slotname_map, op_proto); + fwd_inputs_name_pos_map, fwd_outputs_name_pos_map, + grad_ins_fwd_slotname_map, op_type, in_vars, out_vars); generated_function_body += grad_node_creation_body_str; generated_function_body += "\n"; VLOG(6) << "Generated GradNode Creation codes"; @@ -1004,8 +1121,6 @@ static std::pair GenerateForwardFunctionContents( /* --------- CodeGen: GradNode::operator() ------ */ /* ---------------------------------------------- */ static std::string GenerateGradNodeCCContents( - const std::vector& - grad_node_default_attr_maps, const std::vector& grad_op_types, const std::unordered_map& fwd_inputs_name_pos_map, const std::unordered_map& fwd_outputs_name_pos_map, @@ -1020,7 +1135,8 @@ static std::string GenerateGradNodeCCContents( std::string, std::vector>>& grad_outs, - const proto::OpProto& op_proto) { + const std::string& op_type, const std::vector& in_vars, + const std::vector& out_vars) { VLOG(6) << "Generating Grad Node CC"; /* [Outline] @@ -1066,7 +1182,6 @@ static std::string GenerateGradNodeCCContents( } */ - const std::string& op_type = op_proto.type(); std::string generated_grad_function_body = ""; // [Generation] Get Tracer @@ -1122,7 +1237,7 @@ static std::string GenerateGradNodeCCContents( // [Generation] Get Outs Map std::unordered_set duplicable_input_name_set; - for (const auto& in : op_proto.inputs()) { + for (const auto& in : in_vars) { if (in.duplicable()) duplicable_input_name_set.insert(in.name()); } @@ -1173,7 +1288,7 @@ static std::string GenerateGradNodeCCContents( // [Generation] Get Attrs Map std::string trace_opbase_str = ""; - for (size_t i = 0; i < grad_node_default_attr_maps.size(); i++) { + for (size_t i = 0; i < grad_op_types.size(); i++) { const std::string& op_base_type = grad_op_types[i]; const char* TRACE_OP_TEMPLATE = @@ -1230,10 +1345,9 @@ static std::string GenerateGradNodeCCContents( /* --------- CodeGen: GradNode Header ------ */ /* ----------------------------------------- */ static std::string GenerateGradNodeHeaderContents( - const std::vector& - grad_node_default_attr_maps, const std::map& grad_ins_fwd_slotname_map, - const proto::OpProto& op_proto) { + const std::string& op_type, const std::vector& in_vars, + const std::vector& out_vars) { VLOG(6) << "Generating Grad Node Header"; const char* GRAD_NODE_TEMPLATE = @@ -1261,8 +1375,6 @@ static std::string GenerateGradNodeHeaderContents( "%s\n" "};"; - const std::string& op_type = op_proto.type(); - // [Generation] Handle Attributes std::string set_attr_map_str = " void SetAttrMap(paddle::framework::AttributeMap&& attr_map) {\n " @@ -1279,12 +1391,12 @@ static std::string GenerateGradNodeHeaderContents( // [Generation] Handle TensorWrappers std::unordered_set duplicable_tensors; - for (const proto::OpProto::Var& input : op_proto.inputs()) { + for (const proto::OpProto::Var& input : in_vars) { if (input.duplicable()) { duplicable_tensors.insert(input.name()); } } - for (const proto::OpProto::Var& output : op_proto.outputs()) { + for (const proto::OpProto::Var& output : out_vars) { if (output.duplicable()) { duplicable_tensors.insert(output.name()); } @@ -1454,13 +1566,12 @@ static void DygraphCodeGeneration(const std::string& output_dir) { /* ----------------------------- */ /* ---- Collect Information ---- */ /* ----------------------------- */ - std::vector grad_node_default_attr_maps; std::vector grad_op_types; - std::unordered_map fwd_inputs_name_pos_map; - std::unordered_map fwd_outputs_name_pos_map; std::map grad_outs_slotname_map; std::map grad_ins_fwd_slotname_map; std::map grad_ins_grad_slotname_map; + std::vector in_vars; + std::vector out_vars; std::map>> grad_ins; @@ -1470,13 +1581,20 @@ static void DygraphCodeGeneration(const std::string& output_dir) { VLOG(6) << "-------- CollectInformationFromOpInfo -------"; bool is_available = CollectInformationFromOpInfo( - op_info, &grad_node_default_attr_maps, &grad_op_types, - &fwd_inputs_name_pos_map, &fwd_outputs_name_pos_map, - &grad_outs_slotname_map, &grad_ins_fwd_slotname_map, - &grad_ins_grad_slotname_map, &grad_ins, &grad_outs); + op_info, &grad_op_types, &grad_outs_slotname_map, + &grad_ins_fwd_slotname_map, &grad_ins_grad_slotname_map, &in_vars, + &out_vars, &grad_ins, &grad_outs); if (!is_available) continue; + VLOG(6) << "-------- PurifyOpProto -------"; + std::unordered_map fwd_inputs_name_pos_map; + std::unordered_map fwd_outputs_name_pos_map; + PurifyOpProto(*op_proto, &fwd_inputs_name_pos_map, + &fwd_outputs_name_pos_map, &grad_outs_slotname_map, + &grad_ins_fwd_slotname_map, &grad_ins_grad_slotname_map, + &in_vars, &out_vars, &grad_ins, &grad_outs); + /* --------------------------- */ /* --------- CodeGen --------- */ /* --------------------------- */ @@ -1484,10 +1602,10 @@ static void DygraphCodeGeneration(const std::string& output_dir) { VLOG(6) << "-------- GenerateForwardFunctionContents -------"; std::pair body_and_declaration = GenerateForwardFunctionContents( - grad_node_default_attr_maps, fwd_inputs_name_pos_map, - fwd_outputs_name_pos_map, grad_ins_fwd_slotname_map, - grad_ins_grad_slotname_map, grad_outs_slotname_map, grad_ins, - grad_outs, *op_proto); + fwd_inputs_name_pos_map, fwd_outputs_name_pos_map, + grad_ins_fwd_slotname_map, grad_ins_grad_slotname_map, + grad_outs_slotname_map, grad_ins, grad_outs, op_type, in_vars, + out_vars); std::string fwd_function_str = body_and_declaration.first; GenerateForwardDygraphFile(op_type, output_dir, fwd_function_str); @@ -1498,16 +1616,16 @@ static void DygraphCodeGeneration(const std::string& output_dir) { /* ---- xxx_node.h ---- */ VLOG(6) << "-------- GenerateGradNodeHeaderContents -------"; std::string grad_node_h_str = GenerateGradNodeHeaderContents( - grad_node_default_attr_maps, grad_ins_fwd_slotname_map, *op_proto); + grad_ins_fwd_slotname_map, op_type, in_vars, out_vars); GenerateNodeHFile(op_type, output_dir, grad_node_h_str); /* ---- xxx_node.cc ---- */ VLOG(6) << "-------- GenerateGradNodeCCContents -------"; std::string grad_node_cc_str = GenerateGradNodeCCContents( - grad_node_default_attr_maps, grad_op_types, fwd_inputs_name_pos_map, - fwd_outputs_name_pos_map, grad_ins_fwd_slotname_map, - grad_ins_grad_slotname_map, grad_outs_slotname_map, grad_ins, grad_outs, - *op_proto); + grad_op_types, fwd_inputs_name_pos_map, fwd_outputs_name_pos_map, + grad_ins_fwd_slotname_map, grad_ins_grad_slotname_map, + grad_outs_slotname_map, grad_ins, grad_outs, op_type, in_vars, + out_vars); GenerateNodeCCFile(op_type, output_dir, grad_node_cc_str); VLOG(6) << op_type << ": Finished Generation"; diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 850f208359e05..749782f2413e5 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/pybind/op_function_generator.h" + #include #include #include @@ -30,108 +32,6 @@ #include "paddle/fluid/framework/fleet/ascend_wrapper.h" #endif -// NOTE(zhiqiu): Commonly, the inputs in auto-generated OP function are -// determined by the OP`s proto automatically, i.e., all the inputs registered -// in OpMaker. -// However, some OPs have dispensable inputs, which means the input can -// be none for some conditions. It is discovered that most dispensable inputs -// is not used in imperative mode, so we drop those inputs when generating OP -// functions. While, for very few OPs, the dispensable inputs are used, we -// need to manually specify them in this map. -std::map> op_ins_map = { - {"layer_norm", {"X", "Scale", "Bias"}}, - {"bincount", {"X", "Weights"}}, - {"fused_attention", - {"X", "LnScale", "LnBias", "QKVW", "QKVBias", "SrcMask", "OutLinearW", - "OutLinearBias", "Ln2Scale", "Ln2Bias"}}, - {"instance_norm", {"X", "Scale", "Bias"}}, - {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}}, - {"label_smooth", {"X", "PriorDist"}}, - {"assign", {"X"}}, - {"reshape2", {"X", "Shape"}}, - {"expand", {"X", "ExpandTimes"}}, - {"slice", {"Input", "StartsTensor", "EndsTensor"}}, - {"fake_quantize_dequantize_moving_average_abs_max", - {"X", "InScale", "InAccum", "InState"}}, - {"nll_loss", {"X", "Label", "Weight"}}, - {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}}, - {"gather", {"X", "Index", "Axis"}}, - {"roi_pool", {"X", "ROIs", "RoisNum"}}, - {"roi_align", {"X", "ROIs", "RoisNum"}}, - {"psroi_pool", {"X", "ROIs", "RoisNum"}}, - {"collect_fpn_proposals", - {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}}, - {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}}, - {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}}, - {"hierarchical_sigmoid", - {"X", "W", "Label", "PathTable", "PathCode", "Bias"}}, - {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}}, - {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}}, - {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}}, - {"momentum", {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}}, - {"sparse_momentum", {"Param", "Grad", "Velocity", "Index", "LearningRate"}}, - {"rnn", {"Input", "PreState", "WeightList", "SequenceLength"}}, - {"run_program", {"X", "Params"}}, - {"fused_feedforward", - {"Dropout1Seed", "Dropout2Seed", "Linear1Bias", "Linear2Bias", "Ln1Scale", - "Ln1Bias", "Ln2Scale", "Ln2Bias"}}, - {"faster_tokenizer", {"Text", "Vocab", "TextPair"}}, - {"matrix_rank", {"X", "TolTensor"}}, - {"adam", - {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow", - "Beta2Pow", "MasterParam"}}, - {"adamw", - {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow", - "Beta2Pow", "MasterParam"}}, -}; - -// NOTE(zhiqiu): Like op_ins_map. -// Commonly, the outputs in auto-generated OP function are determined by the -// OP`s proto automatically, i.e., all the outputs registered in OpMaker. -// However, some OPs have dispensable outputs, which means the output can -// be none for some conditions. It is discovered that most dispensable outputs -// is not used in imperative mode, so we drop those outputs when generating OP -// functions. While, for very few OPs, the dispensable outputs are used, we -// need to manually specify them in this map. -std::map> op_outs_map = { - {"fake_quantize_dequantize_moving_average_abs_max", - {"Out", "OutScale", "OutAccum", "OutState"}}, - {"batch_norm", - {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", - "ReserveSpace"}}, - {"fused_attention", - {"LnMean", "LnVariance", "LnOut", "QKVOut", "QKVBiasOut", "TransposeOut2", - "QKOut", "QKTVOut", "SoftmaxOut", "AttnDropoutMaskOut", "AttnDropoutOut", - "SrcMaskOut", "FMHAOut", "OutLinearOut", "DropoutMaskOut", "Ln2Mean", - "Ln2Variance", "BiasDropoutResidualOut", "Y"}}, - {"sync_batch_norm", - {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", - "ReserveSpace"}}, - {"unique", {"Out", "Index", "Indices", "Counts"}}, - {"unique_consecutive", {"Out", "Index", "Counts"}}, - {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, - {"collect_fpn_proposals", {"FpnRois", "RoisNum"}}, - {"matrix_nms", {"Out", "Index", "RoisNum"}}, - {"distribute_fpn_proposals", - {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, - {"moving_average_abs_max_scale", - {"Out", "OutScale", "OutAccum", "OutState"}}, - {"multiclass_nms3", {"Out", "NmsRoisNum"}}, - {"generate_proposals_v2", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, - {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}}, - {"sparse_momentum", {"ParamOut", "VelocityOut"}}, - {"rnn", {"DropoutState", "Reserve", "Out", "State"}}, - {"lamb", - {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}}, - {"run_program", {"DOut"}}, - {"adam", - {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut", - "MasterParamOut"}}, - {"adamw", - {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut", - "MasterParamOut"}}, -}; - // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are // generated in C++ automatically. // However, some OPs need to pass the outputs from Python instead of generating diff --git a/paddle/fluid/pybind/op_function_generator.h b/paddle/fluid/pybind/op_function_generator.h new file mode 100644 index 0000000000000..ad7fa780976d7 --- /dev/null +++ b/paddle/fluid/pybind/op_function_generator.h @@ -0,0 +1,121 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +// NOTE(zhiqiu): Commonly, the inputs in auto-generated OP function are +// determined by the OP`s proto automatically, i.e., all the inputs registered +// in OpMaker. +// However, some OPs have dispensable inputs, which means the input can +// be none for some conditions. It is discovered that most dispensable inputs +// is not used in imperative mode, so we drop those inputs when generating OP +// functions. While, for very few OPs, the dispensable inputs are used, we +// need to manually specify them in this map. +std::map> op_ins_map = { + {"layer_norm", {"X", "Scale", "Bias"}}, + {"bincount", {"X", "Weights"}}, + {"fused_attention", + {"X", "LnScale", "LnBias", "QKVW", "QKVBias", "SrcMask", "OutLinearW", + "OutLinearBias", "Ln2Scale", "Ln2Bias"}}, + {"instance_norm", {"X", "Scale", "Bias"}}, + {"gru_unit", {"Input", "HiddenPrev", "Weight", "Bias"}}, + {"label_smooth", {"X", "PriorDist"}}, + {"assign", {"X"}}, + {"reshape2", {"X", "Shape"}}, + {"expand", {"X", "ExpandTimes"}}, + {"slice", {"Input", "StartsTensor", "EndsTensor"}}, + {"fake_quantize_dequantize_moving_average_abs_max", + {"X", "InScale", "InAccum", "InState"}}, + {"nll_loss", {"X", "Label", "Weight"}}, + {"bilinear_tensor_product", {"X", "Y", "Weight", "Bias"}}, + {"gather", {"X", "Index", "Axis"}}, + {"roi_pool", {"X", "ROIs", "RoisNum"}}, + {"roi_align", {"X", "ROIs", "RoisNum"}}, + {"psroi_pool", {"X", "ROIs", "RoisNum"}}, + {"collect_fpn_proposals", + {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}}, + {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}}, + {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}}, + {"hierarchical_sigmoid", + {"X", "W", "Label", "PathTable", "PathCode", "Bias"}}, + {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}}, + {"multiclass_nms3", {"BBoxes", "Scores", "RoisNum"}}, + {"box_coder", {"PriorBox", "PriorBoxVar", "TargetBox"}}, + {"momentum", {"Param", "Grad", "Velocity", "LearningRate", "MasterParam"}}, + {"sparse_momentum", {"Param", "Grad", "Velocity", "Index", "LearningRate"}}, + {"rnn", {"Input", "PreState", "WeightList", "SequenceLength"}}, + {"run_program", {"X", "Params"}}, + {"fused_feedforward", + {"Dropout1Seed", "Dropout2Seed", "Linear1Bias", "Linear2Bias", "Ln1Scale", + "Ln1Bias", "Ln2Scale", "Ln2Bias"}}, + {"faster_tokenizer", {"Text", "Vocab", "TextPair"}}, + {"matrix_rank", {"X", "TolTensor"}}, + {"adam", + {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow", + "Beta2Pow", "MasterParam"}}, + {"adamw", + {"Param", "Grad", "LearningRate", "Moment1", "Moment2", "Beta1Pow", + "Beta2Pow", "MasterParam"}}, +}; + +// NOTE(zhiqiu): Like op_ins_map. +// Commonly, the outputs in auto-generated OP function are determined by the +// OP`s proto automatically, i.e., all the outputs registered in OpMaker. +// However, some OPs have dispensable outputs, which means the output can +// be none for some conditions. It is discovered that most dispensable outputs +// is not used in imperative mode, so we drop those outputs when generating OP +// functions. While, for very few OPs, the dispensable outputs are used, we +// need to manually specify them in this map. +std::map> op_outs_map = { + {"fake_quantize_dequantize_moving_average_abs_max", + {"Out", "OutScale", "OutAccum", "OutState"}}, + {"batch_norm", + {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", + "ReserveSpace"}}, + {"fused_attention", + {"LnMean", "LnVariance", "LnOut", "QKVOut", "QKVBiasOut", "TransposeOut2", + "QKOut", "QKTVOut", "SoftmaxOut", "AttnDropoutMaskOut", "AttnDropoutOut", + "SrcMaskOut", "FMHAOut", "OutLinearOut", "DropoutMaskOut", "Ln2Mean", + "Ln2Variance", "BiasDropoutResidualOut", "Y"}}, + {"sync_batch_norm", + {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", + "ReserveSpace"}}, + {"unique", {"Out", "Index", "Indices", "Counts"}}, + {"unique_consecutive", {"Out", "Index", "Counts"}}, + {"generate_proposals", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, + {"collect_fpn_proposals", {"FpnRois", "RoisNum"}}, + {"matrix_nms", {"Out", "Index", "RoisNum"}}, + {"distribute_fpn_proposals", + {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, + {"moving_average_abs_max_scale", + {"Out", "OutScale", "OutAccum", "OutState"}}, + {"multiclass_nms3", {"Out", "NmsRoisNum"}}, + {"generate_proposals_v2", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, + {"momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}}, + {"sparse_momentum", {"ParamOut", "VelocityOut"}}, + {"rnn", {"DropoutState", "Reserve", "Out", "State"}}, + {"lamb", + {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}}, + {"run_program", {"DOut"}}, + {"adam", + {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut", + "MasterParamOut"}}, + {"adamw", + {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut", + "MasterParamOut"}}, +}; From 361742ce51941e742c2f62c9ebf36796edac4b26 Mon Sep 17 00:00:00 2001 From: jim19930609 Date: Wed, 1 Dec 2021 07:27:39 +0000 Subject: [PATCH 2/2] Enabled Eager Dygraph AutoCodeGen for 500+ existing ops --- .../eager/auto_code_generator/CMakeLists.txt | 4 +- .../auto_code_generator/eager_generator.cc | 40 +- .../eager/auto_code_generator/op_list.txt | 501 ++++++++++++++++++ .../tests/task_tests/eager_utils_test.cc | 2 +- paddle/fluid/eager/utils.cc | 4 +- paddle/fluid/eager/utils.h | 2 +- 6 files changed, 538 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index 03cec80b682b1..187c3db445222 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -47,12 +47,12 @@ if(WIN32) endif() add_custom_target(eager_codegen - COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" + COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt" DEPENDS ${EAGER_CODEGEN_DEPS} VERBATIM) else() add_custom_target(eager_codegen - COMMAND "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" + COMMAND "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt" DEPENDS eager_generator VERBATIM) endif() diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 136eaebe2cc4b..283153585866a 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -26,6 +27,9 @@ #include "paddle/fluid/pybind/pybind.h" #include "paddle/fluid/string/string_helper.h" +DEFINE_bool(generate_all, false, + "Generate all operators currently registered in Paddle"); + static std::unordered_set operators_to_skip = { "fused_elemwise_add_activation", // No Default Attr "fused_elemwise_activation", // No Default Attr @@ -40,12 +44,10 @@ static std::unordered_set operators_to_skip = { "pull_box_sparse", "fused_attention", "diag_v2", -}; - -static std::unordered_set operators_to_codegen = { - "sigmoid", "matmul_v2", "reduce_sum", "elementwise_add", - "share_buffer", "var_conv_2d", "split"}; + "transfer_dtype", + "c_split"}; +static std::unordered_set operators_to_codegen = {}; static std::unordered_set skipped_operators = {}; namespace paddle { @@ -353,7 +355,10 @@ static bool CheckOpProto(proto::OpProto* op_proto) { // Only handle matmul_v2 for now VLOG(1) << "------ Analyzing Op ------: " << op_type; - if (!operators_to_codegen.count(op_type)) return false; + if (!FLAGS_generate_all) { + if (!operators_to_codegen.count(op_type)) return false; + } + if (operators_to_skip.count(op_type)) return false; return true; @@ -976,7 +981,7 @@ static std::pair GenerateForwardFunctionContents( paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, outnum); dygraph_function_args_str += arg_str; const char* FWD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::ConstructDuplicableOutput(%s) },"; + "{ \"%s\", egr::EagerUtils::ConstructDuplicableOutput(%s) },"; outs_contents_str += paddle::string::Sprintf(FWD_OUTS_CONTENT_TEMPLATE, output_name, outnum); } else { @@ -1253,7 +1258,7 @@ static std::string GenerateGradNodeCCContents( if (duplicable_input_name_set.count(fwd_input_name)) { const char* GRAD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::ConstructDuplicableOutput( " + "{ \"%s\", egr::EagerUtils::ConstructDuplicableOutput( " "this->OutputMeta()[%d].Size() ) },"; outs_contents_str += paddle::string::Sprintf( GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position); @@ -1639,13 +1644,30 @@ static void DygraphCodeGeneration(const std::string& output_dir) { } // namespace framework } // namespace paddle +static void CollectOperatorsToCodeGen(const std::string& op_list_path) { + std::string line; + std::ifstream op_list_file(op_list_path); + if (op_list_file.is_open()) { + while (getline(op_list_file, line)) { + operators_to_codegen.insert(line); + } + op_list_file.close(); + } else { + PADDLE_THROW( + paddle::platform::errors::Fatal("Unable to open op_list.txt file")); + } +} + int main(int argc, char* argv[]) { - if (argc != 2) { + if (argc != 3) { std::cerr << "argc must be 2" << std::endl; return -1; } std::string eager_root = argv[1]; + std::string op_list_path = argv[2]; + + CollectOperatorsToCodeGen(op_list_path); paddle::framework::DygraphCodeGeneration(eager_root); return 0; diff --git a/paddle/fluid/eager/auto_code_generator/op_list.txt b/paddle/fluid/eager/auto_code_generator/op_list.txt index 00a9abde156fb..6bfba753633f3 100644 --- a/paddle/fluid/eager/auto_code_generator/op_list.txt +++ b/paddle/fluid/eager/auto_code_generator/op_list.txt @@ -2,3 +2,504 @@ sigmoid matmul_v2 reduce_sum elementwise_add +rsqrt +multihead_matmul +addmm +gru +round +rank_attention +fused_embedding_fc_lstm +where_index +bicubic_interp +arg_min +tile +bilinear_tensor_product +ctc_align +pow2_decay_with_linear_warmup +split +fc +clear_float_status +load +elementwise_max +adadelta +chunk_eval +check_finite_and_unscale +sparse_momentum +tan +adam +fsp +where +logical_xor +multiclass_nms3 +one_hot_v2 +sequence_softmax +affine_channel +triangular_solve +sequence_topk_avg_pooling +space_to_depth +reverse +fused_embedding_eltwise_layernorm +expand_v2 +lgamma +solve +deformable_psroi_pooling +instance_norm +decode_jpeg +gather_nd +reduce_prod +matrix_rank +asin +lstmp +iou_similarity +huber_loss +one_hot +sequence_slice +lookup_table +softplus +depthwise_conv2d +fused_fc_elementwise_layernorm +sigmoid_cross_entropy_with_logits +exp +scatter +equal_all +searchsorted +fusion_squared_mat_sub +unique +log +conv_shift +smooth_l1_loss +linear_interp_v2 +momentum +temporal_shift +nce +mv +proximal_gd +memcpy_h2d +add_position_encoding +cosh +hash +grad_add +sign +prelu +linspace +fill_diagonal +logsigmoid +load_combine +fetch_v2 +randperm +sequence_scatter +partial_sum +relu6 +conv3d +lstm_unit +not_equal +transpose2 +uniform_random_batch_size_like +unfold +lrn +softmax_with_cross_entropy +isfinite_v2 +bernoulli +max_pool3d_with_index +gaussian_random +flatten2 +matmul +cvm +adamax +masked_select +range +bitwise_not +trace +multinomial +modified_huber_loss +roll +squared_l2_distance +conv3d_transpose +share_data +fake_quantize_abs_max +unique_with_counts +fill +concat +fill_zeros_like +hierarchical_sigmoid +isinf_v2 +squeeze +multiclass_nms2 +bpr_loss +fft_c2c +bicubic_interp_v2 +reshape +coalesce_tensor +roi_align +reshape2 +reduce_any +unstack +scatter_nd_add +sequence_reshape +bilateral_slice +fill_any_like +empty +pad_constant_like +pool2d +size +imag +eigh +stack +dgc_momentum +lamb +generate_proposals_v2 +bitwise_or +gru_unit +fake_channel_wise_quantize_dequantize_abs_max +sampling_id +unsqueeze2 +average_accumulates +sequence_enumerate +fusion_seqconv_eltadd_relu +bce_loss +generate_proposal_labels +im2sequence +isinf +adagrad +linear_chain_crf +retinanet_target_assign +fusion_group +teacher_student_sigmoid_loss +random_crop +lookup_table_v2 +detection_map +l1_norm +sqrt +fused_elemwise_activation +slogdeterminant +share_buffer +bitwise_and +diag_embed +unbind +dropout +moving_average_abs_max_scale +beam_search +log_loss +greater_than +kron +sigmoid_focal_loss +rmsprop +conv2d +uniform_random_inplace +maxout +linear_interp +auc +logical_or +batch_norm +acos +unpool +cumprod +sample_logits +pull_box_extended_sparse +crop_tensor +fill_constant +deformable_conv +generate_mask_labels +locality_aware_nms +expand_as +matrix_power +greater_equal +generate_proposals +bilinear_interp +inplace_abn +softshrink +mul +data_norm +get_tensor_from_selected_rows +spp +floor +gelu +retinanet_detection_output +minus +push_dense +silu +sequence_erase +real +nearest_interp_v2 +dgc_clip_by_norm +squeeze2 +strided_slice +conj +precision_recall +save +fusion_seqexpand_concat_fc +fake_quantize_range_abs_max +depthwise_conv2d_transpose +positive_negative_pair +square +var_conv_2d +log1p +fused_softmax_mask_upper_triangle +clip_by_norm +atan2 +box_decoder_and_assign +fft_r2c +roi_pool +overlap_add +fill_constant_batch_size_like +fill_any +dequantize_log +max_pool2d_with_index +pad3d +norm +viterbi_decode +mish +box_coder +flatten +elementwise_mod +margin_cross_entropy +pull_sparse +logical_and +pow +stanh +label_smooth +merged_momentum +ascend_trigger +fused_feedforward +rpn_target_assign +roi_perspective_transform +expand +prroi_pool +pool3d +memcpy +distribute_fpn_proposals +frame +bincount +shape +group_norm +resnet_unit +sequence_expand_as +cos_sim +eigvals +save_combine +class_center_sample +read_file +isfinite +arg_max +equal +fake_dequantize_max_abs +qr +anchor_generator +layer_norm +merge_selected_rows +less_equal +rnn +fusion_lstm +lars_momentum +hard_sigmoid +isnan +elementwise_floordiv +correlation +histogram +gather_tree +segment_pool +sync_batch_norm +fusion_repeated_fc_relu +nop +fused_attention +expand_as_v2 +filter_by_instag +diag_v2 +pull_box_sparse +nll_loss +dot +scale +ncclBcast +shuffle_batch +ncclReduce +diag +multiplex +leaky_relu +allclose +adamw +elementwise_pow +prior_box +p_norm +unique_consecutive +lod_reset +pad +sequence_conv +log10 +set_value +bitwise_xor +center_loss +randint +attention_lstm +uniform_random +slice +meshgrid +hard_swish +sin +mean_iou +pad2d +inverse +spectral_norm +shuffle_channel +psroi_pool +seed +ceil +eig +reduce_min +cos +ncclAllReduce +cudnn_lstm +digamma +assign_value +increment +tdm_sampler +fused_softmax_mask +sequence_reverse +eigvalsh +diagonal +trunc +log2 +marker +tanh +yolov3_loss +graph_send_recv +accuracy +atan +less_than +unsqueeze +crf_decoding +log_softmax +ftrl +matrix_nms +top_k_v2 +cast +tanh_shrink +hard_shrink +multiclass_nms +fusion_transpose_flatten_concat +sequence_unpad +fused_elemwise_add_activation +pull_sparse_v2 +frobenius_norm +crop +cross_entropy2 +skip_layernorm +tdm_child +fused_embedding_seq_pool +erf +conv2d_inception_fusion +trilinear_interp +logsumexp +fusion_seqpool_concat +alloc_float_status +sequence_concat +fusion_seqpool_cvm_concat +similarity_focus +argsort +sequence_expand +sgd +fused_bn_add_activation +bilinear_interp_v2 +clip +deformable_conv_v1 +hinge_loss +determinant +conv2d_transpose +memcpy_d2h +softsign +fake_quantize_dequantize_abs_max +broadcast_tensors +grid_sampler +fft_c2r +pyramid_hash +fake_quantize_dequantize_moving_average_abs_max +multi_dot +sequence_pool +transpose +top_k +dist +affine_grid +gaussian_random_batch_size_like +fake_channel_wise_dequantize_max_abs +reciprocal +sequence_mask +fill_diagonal_tensor +abs +partial_concat +elu +index_select +row_conv +cross +elementwise_mul +decayed_adagrad +bipartite_match +run_program +fake_quantize_moving_average_abs_max +mine_hard_examples +target_assign +lstm +truncated_gaussian_random +match_matrix_tensor +elementwise_div +kldiv_loss +cumsum +sum +proximal_adagrad +update_loss_scaling +shard_index +selu +mean +gumbel_softmax +sequence_pad +tree_conv +assign +flatten_contiguous_range +tril_triu +brelu +celu +reduce_mean +sinh +rank_loss +reduce_max +fusion_gru +fill_zeros_like2 +expm1 +squared_l2_norm +elementwise_sub +margin_rank_loss +faster_tokenizer +relu +is_empty +reduce_all +edit_distance +bmm +yolo_box +soft_relu +density_prior_box +eye +swish +cross_entropy +dpsgd +cholesky +batch_fc +nearest_interp +gather +trilinear_interp_v2 +box_clip +isnan_v2 +softmax +conv2d_fusion +fused_batch_norm_act +get_float_status +index_sample +elementwise_min +logical_not +collect_fpn_proposals +pixel_shuffle +thresholded_relu +polygon_box_transform +lookup_table_dequant +warpctc +fake_channel_wise_quantize_abs_max +dequantize_abs_max +svd +flip diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index c7c27dcc1d150..ea9aae83ff189 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -60,7 +60,7 @@ TEST(EagerUtils, AutoGradMeta) { std::vector autograd_metas = EagerUtils::multi_autograd_meta(&ets); std::vector unsafe_autograd_metas = - EagerUtils::unsafe_autograd_meta(&ets); + EagerUtils::unsafe_autograd_meta(ets); CHECK_NOTNULL(unsafe_autograd_metas[0]); CHECK_NOTNULL(unsafe_autograd_metas[1]); diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 28eefd62c5aa0..be06bf9eb344b 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -48,9 +48,9 @@ AutogradMeta* EagerUtils::unsafe_autograd_meta(const egr::EagerTensor& target) { } std::vector EagerUtils::unsafe_autograd_meta( - std::vector* targets) { + const std::vector& targets) { std::vector metas; - for (const egr::EagerTensor& t : *targets) { + for (const egr::EagerTensor& t : targets) { metas.push_back(unsafe_autograd_meta(t)); } return metas; diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index f7e226a2aba36..03f922e5bf9ba 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -114,7 +114,7 @@ class EagerUtils { // This method will return an AutogradMeta pointer unsafely. static AutogradMeta* unsafe_autograd_meta(const egr::EagerTensor& target); static std::vector unsafe_autograd_meta( - std::vector* targets); + const std::vector& targets); template static bool ComputeRequireGrad(T trace_backward, Args&&... args) {