diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt index 08c2d2e05558b..8810b7845dbe8 100644 --- a/paddle/fluid/CMakeLists.txt +++ b/paddle/fluid/CMakeLists.txt @@ -5,7 +5,7 @@ add_subdirectory(framework) add_subdirectory(imperative) add_subdirectory(operators) add_subdirectory(string) -add_subdirectory(pybind) add_subdirectory(eager) +add_subdirectory(pybind) # NOTE: please add subdirectory inference at last. add_subdirectory(inference) diff --git a/paddle/fluid/eager/autocodegen/eager_generator.cc b/paddle/fluid/eager/autocodegen/eager_generator.cc index 6ec92854b8d32..3de1989ccb065 100644 --- a/paddle/fluid/eager/autocodegen/eager_generator.cc +++ b/paddle/fluid/eager/autocodegen/eager_generator.cc @@ -36,8 +36,8 @@ static std::unordered_set operators_to_skip = { }; static std::unordered_set operators_to_codegen = { - "sigmoid", "matmul_v2", -}; + "sigmoid", "matmul_v2", "reduce_sum", "elementwise_add", + "share_buffer", "var_conv_2d", "split"}; static std::unordered_set skipped_operators = {}; @@ -615,6 +615,12 @@ static std::string GenerateGradNodeCreationContent( prepare_autograd_meta_str += get_autograd_meta_str; prepare_autograd_meta_str += "\n"; + // [GradOpNode] GetTraceBackward + std::string trace_backward_str = + " bool trace_backward = egr::Controller::Instance().HasGrad();\n"; + prepare_autograd_meta_str += trace_backward_str; + prepare_autograd_meta_str += "\n"; + // [GradOpNode] Generation std::string grad_node_creation_str = ""; @@ -629,14 +635,9 @@ static std::string GenerateGradNodeCreationContent( // [GradOpNode] Set Attrs grad_node_creation_str += " // Set Attributes\n"; - for (const auto& default_attr_map : grad_node_default_attr_maps) { - for (const auto& kv : default_attr_map) { - const std::string& attr_name = kv.first; - const char* SET_ATTR_TEMPLATE = " grad_node->SetAttr%s(%s);\n"; - grad_node_creation_str += - paddle::string::Sprintf(SET_ATTR_TEMPLATE, attr_name, attr_name); - } - } + grad_node_creation_str += " grad_node->SetAttrMap(std::move(attrs));\n"; + grad_node_creation_str += + " grad_node->SetDefaultAttrMap(std::move(default_attrs));\n"; grad_node_creation_str += "\n"; // [GradOpNode] Set TensorWrappers @@ -710,6 +711,18 @@ static std::string GenerateGradNodeCreationContent( return grad_node_creation_body_str; } +static std::string AppendUseOp(const std::string& op_type) { + // [Generation] Append USE_OP + const char* USE_OP_TEMPLATE = "USE_OP(%s);\n"; + std::string return_str = paddle::string::Sprintf(USE_OP_TEMPLATE, op_type); + + // Special Ops + if (op_type == "reduce_sum") + return_str += paddle::string::Sprintf(USE_OP_TEMPLATE, "reduce_sum_grad"); + + return return_str; +} + /* -------------------------------- */ /* --------- CodeGen: Forward ----- */ /* -------------------------------- */ @@ -733,7 +746,8 @@ static std::pair GenerateForwardFunctionContents( /* // Forward Function Example: std::tuple, Tensor, vector> - kernel_function(vector& X, Tensor& Y, float attr0, int attr1, size_t + kernel_function(vector& X, Tensor& Y, const paddle::AttributeMap& + attr_map, size_t Out0Num, size_t Out1Num) { // Forward Function Body @@ -749,8 +763,7 @@ static std::pair GenerateForwardFunctionContents( ,ConstructDuplicableOutput(Out1Num)} }; // According to op_proto->attrs() - framework::AttributeMap attrs = { {"attr0", attr0}, ... }; - egr::RunOp("op_type", ins, outs, attrs, + egr::RunOp("op_type", ins, outs, attr_map, Controller.Instance().GetExpectedPlace(), {}); // According to fwd_outputs_names @@ -814,32 +827,6 @@ static std::pair GenerateForwardFunctionContents( generated_function_body += ins_map_str; generated_function_body += "\n"; - // [Generation] Get Attrs - std::string attr_contents_str = ""; - for (const proto::OpProto::Attr& attr : op_proto.attrs()) { - const std::string& attr_name = attr.name(); - - proto::AttrType attr_type = attr.type(); - const std::string attr_type_str = AttrTypeToString(attr_type); - - const char* FWD_KERNEL_ARG_TEMPLATE = ", const %s %s"; - std::string arg_str = paddle::string::Sprintf(FWD_KERNEL_ARG_TEMPLATE, - attr_type_str, attr_name); - dygraph_function_args_str += arg_str; - - const char* FWD_ATTR_CONTENT_TEMPLATE = "{ \"%s\", %s }, "; - std::string attr_content_str = paddle::string::Sprintf( - FWD_ATTR_CONTENT_TEMPLATE, attr_name, attr_name); - attr_contents_str += attr_content_str; - } - - const char* FWD_ATTR_MAP_TEMPLATE = - " paddle::framework::AttributeMap attrs = { %s };\n"; - std::string attr_map_str = - paddle::string::Sprintf(FWD_ATTR_MAP_TEMPLATE, attr_contents_str); - generated_function_body += attr_map_str; - generated_function_body += "\n"; - // [Generation] Get Outs Map std::string outs_contents_str = ""; for (const proto::OpProto::Var& output : op_proto.outputs()) { @@ -877,10 +864,18 @@ static std::pair GenerateForwardFunctionContents( generated_function_body += outs_map_str; generated_function_body += "\n"; + // [Generation] Get Attrs + dygraph_function_args_str += + ", const paddle::framework::AttributeMap& attr_map"; + generated_function_body += "\n"; + // [Generation] Get TraceOp const char* FWD_TRACE_OP_TEMPLATE = - " egr::RunOp(\"%s\", ins, outs, attrs, " - "egr::Controller::Instance().GetExpectedPlace(), {});\n"; + " paddle::framework::AttributeMap attrs = attr_map;\n" + " paddle::framework::AttributeMap default_attrs;\n" + " egr::RunOp(\"%s\", ins, outs, attrs, \n" + " egr::Controller::Instance().GetExpectedPlace(),\n" + " &default_attrs, true, {});\n"; std::string trace_op_str = paddle::string::Sprintf(FWD_TRACE_OP_TEMPLATE, op_proto.type()); generated_function_body += trace_op_str; @@ -945,7 +940,7 @@ static std::pair GenerateForwardFunctionContents( return_str = paddle::string::Sprintf(FWD_TUPLE_RETURN_TEMPLATE, return_type_str, return_content_str); - const char* FWD_FUNCTION_PROTO_RETURN_TEMPLATE = "std::tuple<%s>;"; + const char* FWD_FUNCTION_PROTO_RETURN_TEMPLATE = "std::tuple<%s>"; function_proto_return_type_str = paddle::string::Sprintf( FWD_FUNCTION_PROTO_RETURN_TEMPLATE, return_type_str); } else { @@ -962,16 +957,13 @@ static std::pair GenerateForwardFunctionContents( // [Generation] Get Full Function std::string function_name = op_type + "_dygraph_function"; - // Add trace_backward - dygraph_function_args_str += ", bool trace_backward"; const char* FWD_FUNCTION_TEMPLATE = "%s %s(%s) {\n\n%s\n}\n\n"; std::string fwd_function_str = paddle::string::Sprintf( FWD_FUNCTION_TEMPLATE, function_proto_return_type_str, function_name, dygraph_function_args_str, generated_function_body); // [Generation] Append USE_OP - const char* USE_OP_TEMPLATE = "USE_OP(%s);\n"; - fwd_function_str += paddle::string::Sprintf(USE_OP_TEMPLATE, op_type); + fwd_function_str += AppendUseOp(op_type); // [Generation] Generate forward functions header const char* FWD_HEADER_TEMPLATE = "%s %s(%s);\n"; @@ -1031,15 +1023,9 @@ static std::string GenerateGradNodeCCContents( // Visit each OpBase for(auto iter = "grad_node->begin()"; iter < "grad_node->end()"; iter++) { - framework::AttributeMap attrs; - for("auto& kv : iter->Attrs()") { - attrs[kv.first] = this->"kv.first"; - } - for(auto& kv : "iter->DefaultAttrsMap()") { - attrs[kv.first] = this->"kv.first"; - } - egr::RunOp("iter->Type()", ins, outs, attrs, - egr::Controller::Instance().ExpectedPlace(), false, {}); + // Simply pass entire attribute map to kernels + egr::RunOp("iter->Type()", ins, outs, this->attr_map_, + egr::Controller::Instance().ExpectedPlace(), false, {}); } vector> outputs(outs.size()); @@ -1100,19 +1086,35 @@ static std::string GenerateGradNodeCCContents( generated_grad_function_body += ins_map_str; // [Generation] Get Outs Map + std::unordered_set duplicable_input_name_set; + for (const auto& out : op_proto.outputs()) { + if (out.duplicable()) duplicable_input_name_set.insert(out.name()); + } + std::string outs_contents_str = ""; for (auto iter : grad_outs) { const std::string& grad_output_name = iter.first; if (grad_outs_slotname_map.count(grad_output_name)) { // Fwd Tensor - size_t fwd_input_position = fwd_inputs_name_pos_map.at( - grad_outs_slotname_map.at(grad_output_name)); - const char* GRAD_OUTS_CONTENT_TEMPLATE = - "{ \"%s\", egr::ConstructDuplicableOutput( " - "this->OutputMeta()[%d].Size() ) },"; - outs_contents_str += paddle::string::Sprintf( - GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position); + const std::string& fwd_input_name = + grad_outs_slotname_map.at(grad_output_name); + size_t fwd_input_position = fwd_inputs_name_pos_map.at(fwd_input_name); + + if (duplicable_input_name_set.count(fwd_input_name)) { + const char* GRAD_OUTS_CONTENT_TEMPLATE = + "{ \"%s\", egr::ConstructDuplicableOutput( " + "this->OutputMeta()[%d].Size() ) },"; + outs_contents_str += paddle::string::Sprintf( + GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position); + } else { + const char* GRAD_OUTS_CONTENT_TEMPLATE = + "{ \"%s\", " + "{std::make_shared(egr::Controller::Instance()." + "GenerateUniqueName())}},"; + outs_contents_str += paddle::string::Sprintf(GRAD_OUTS_CONTENT_TEMPLATE, + grad_output_name); + } } else { PADDLE_THROW(platform::errors::Fatal( "Unable to find forward slot name that matches %s", @@ -1134,32 +1136,16 @@ static std::string GenerateGradNodeCCContents( // [Generation] Get Attrs Map std::string trace_opbase_str = ""; for (size_t i = 0; i < grad_node_default_attr_maps.size(); i++) { - const auto& default_attr_map = grad_node_default_attr_maps[i]; const std::string& op_base_type = grad_op_types[i]; - std::string attr_contents_str = ""; - for (const auto& kv : default_attr_map) { - const std::string& attr_name = kv.first; - const std::string& struct_attr_name = kv.first + "_"; - const char* ATTR_CONTENT_TEMPLATE = "{ \"%s\", this->%s},"; - attr_contents_str += paddle::string::Sprintf(ATTR_CONTENT_TEMPLATE, - attr_name, struct_attr_name); - } - if (attr_contents_str.size() > 0) attr_contents_str.pop_back(); - - const char* ATTRS_MAP_TEMPLATE = - " paddle::framework::AttributeMap attrs = { %s };\n"; - std::string attrs_map_str = - paddle::string::Sprintf(ATTRS_MAP_TEMPLATE, attr_contents_str); const char* TRACE_OP_TEMPLATE = - " egr::RunOp(\"%s\", ins, outs, attrs, " - "egr::Controller::Instance().GetExpectedPlace(), {});\n"; - std::string trace_op_str = - paddle::string::Sprintf(TRACE_OP_TEMPLATE, op_base_type); - - trace_opbase_str += attrs_map_str; - trace_opbase_str += "\n"; - trace_opbase_str += trace_op_str; + " // Pass the entire attribute map to TraceOp\n" + " // The underlying kernel will pickup whatever attribute they need " + "at runtime\n" + " egr::RunOp(\"%s\", ins, outs, this->attr_map_,\n" + " egr::Controller::Instance().GetExpectedPlace(),\n" + " &this->default_attr_map_, false, {});\n"; + trace_opbase_str = paddle::string::Sprintf(TRACE_OP_TEMPLATE, op_base_type); } generated_grad_function_body += trace_opbase_str; @@ -1221,48 +1207,29 @@ static std::string GenerateGradNodeHeaderContents( "\n" " // SetX, SetY, ...\n" "%s\n" - " // SetAttr0, SetAttr1, ...\n" + " // SetAttrMap\n" "%s\n" "\n" " private:\n" " // TensorWrappers\n" "%s\n" - " // Attribute Members\n" + " // Attribute Map\n" "%s\n" "};"; const std::string& op_type = op_proto.type(); // [Generation] Handle Attributes - std::string set_attrs_str = ""; - std::string attr_members_str = ""; - for (const auto& default_attr_map : grad_node_default_attr_maps) { - for (const auto& kv : default_attr_map) { - const std::string& attr_name = kv.first; - const std::string& struct_attr_name = kv.first + "_"; - framework::Attribute attr = kv.second; - - std::string attr_arg_type = GetAttrType(attr, true).first; - const char* SET_ATTR_TEMPLATE = - " void SetAttr%s(%s) {\n %s\n }\n"; - const char* SET_ATTR_BODY_TEMPLATE = "%s = %s;"; - const char* ATTR_ARGS_TEMPLATE = "const %s %s"; - - std::string attr_args_str = - paddle::string::Sprintf(ATTR_ARGS_TEMPLATE, attr_arg_type, attr_name); - std::string set_attr_body_str = paddle::string::Sprintf( - SET_ATTR_BODY_TEMPLATE, struct_attr_name, attr_name); - set_attrs_str += paddle::string::Sprintf( - SET_ATTR_TEMPLATE, attr_name, attr_args_str, set_attr_body_str); - - std::string attr_member_type = GetAttrType(attr, false).first; - std::string attr_value = GetAttrType(attr, false).second; - - const char* ATTR_MEMBER_TEMPLATE = " %s %s = %s;\n"; - attr_members_str += paddle::string::Sprintf( - ATTR_MEMBER_TEMPLATE, attr_member_type, struct_attr_name, attr_value); - } - } + std::string set_attr_map_str = + " void SetAttrMap(paddle::framework::AttributeMap&& attr_map) {\n " + "attr_map_ = std::move(attr_map);\n }\n"; + set_attr_map_str += + " void SetDefaultAttrMap(paddle::framework::AttributeMap&& " + "default_attr_map) {\n default_attr_map_ = " + "std::move(default_attr_map);\n }\n"; + std::string attr_members_str = + " paddle::framework::AttributeMap attr_map_;\n"; + attr_members_str += " paddle::framework::AttributeMap default_attr_map_;"; // [Generation] Handle TensorWrappers std::unordered_set duplicable_inputs; @@ -1314,10 +1281,10 @@ static std::string GenerateGradNodeHeaderContents( tensor_wrapper_arg_str, tensor_wrapper_body_str); } - std::string grad_node_str = - paddle::string::Sprintf(GRAD_NODE_TEMPLATE, op_type, op_type, op_type, - op_type, set_tensor_wrappers_str, set_attrs_str, - tensor_wrapper_members_str, attr_members_str); + std::string grad_node_str = paddle::string::Sprintf( + GRAD_NODE_TEMPLATE, op_type, op_type, op_type, op_type, + set_tensor_wrappers_str, set_attr_map_str, tensor_wrapper_members_str, + attr_members_str); return grad_node_str; } @@ -1360,6 +1327,7 @@ static void GenerateNodeHFile(const std::string& op_type, std::string node_h_filename = op_type + "_node.h"; std::string node_h_path = nodes_dir + node_h_filename; std::string node_h_include_str = + "#pragma once\n" "#include \"paddle/fluid/eager/tensor_wrapper.h\"\n" "#include \"paddle/fluid/eager/function_api.h\"\n" "#include \"paddle/fluid/eager/legacy/op_runner.h\"\n" @@ -1395,6 +1363,7 @@ static void GenerateNodeCCFile(const std::string& op_type, static std::string GenerateDygraphHFileIncludes() { std::string dygraph_forward_api_includes_str = + "#pragma once\n" "#include \"glog/logging.h\"\n" "#include \"paddle/fluid/eager/autograd_meta.h\"\n" "#include \"paddle/pten/api/all.h\"\n" @@ -1432,6 +1401,7 @@ static void DygraphCodeGeneration(const std::string& output_dir) { std::vector>> grad_outs; + if (pair.first == "share_buffer") VLOG(1) << 1111; bool is_available = CollectInformationFromOpInfo( op_info, &grad_node_default_attr_maps, &grad_op_types, &fwd_inputs_name_pos_map, &fwd_outputs_name_pos_map, diff --git a/paddle/fluid/eager/autocodegen/op_list.txt b/paddle/fluid/eager/autocodegen/op_list.txt index 20d2977a2afae..00a9abde156fb 100644 --- a/paddle/fluid/eager/autocodegen/op_list.txt +++ b/paddle/fluid/eager/autocodegen/op_list.txt @@ -1,2 +1,4 @@ sigmoid matmul_v2 +reduce_sum +elementwise_add diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 44ab50b9975c1..9560bfd995719 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -194,6 +194,10 @@ class EagerTensor final { tensor_ = tensor; } + const std::shared_ptr& Tensor() const { + return tensor_; + } + /** Part 9: Get framework::Variable from EagerTensor **/ const paddle::framework::Variable& Var() const { return var_; } diff --git a/paddle/fluid/eager/function_api.h b/paddle/fluid/eager/function_api.h index 9bfc5884bc566..71bae802a6e1e 100644 --- a/paddle/fluid/eager/function_api.h +++ b/paddle/fluid/eager/function_api.h @@ -56,6 +56,7 @@ class Controller { } void SetAMPLevel(int level) { amp_level_ = level; } const int GetAMPLevel() const { return amp_level_; } + bool HasGrad() const { return has_grad_; } std::string GenerateUniqueName(std::string key = "eager_tmp") { return generator_->Generate(key); } @@ -65,6 +66,7 @@ class Controller { static Controller* controller_; std::shared_ptr expected_place_ = nullptr; int amp_level_ = 0; + bool has_grad_ = true; std::unique_ptr generator_{new UniqueNameGenerator()}; DISABLE_COPY_AND_ASSIGN(Controller); }; diff --git a/paddle/fluid/eager/legacy/amp_auto_cast.cc b/paddle/fluid/eager/legacy/amp_auto_cast.cc index 0d39c68ccd1c9..b86cb7a48f616 100644 --- a/paddle/fluid/eager/legacy/amp_auto_cast.cc +++ b/paddle/fluid/eager/legacy/amp_auto_cast.cc @@ -117,7 +117,8 @@ static inline std::shared_ptr CastToType( { AutoCastGuard guard(0); - RunOp("cast", ins, outs, std::move(attrs), {}); + paddle::framework::AttributeMap default_attrs; + RunOp("cast", ins, outs, std::move(attrs), {}, &default_attrs, true); } return out; diff --git a/paddle/fluid/eager/legacy/op_runner.cc b/paddle/fluid/eager/legacy/op_runner.cc index 950e0094c79c8..c8858cbceca2d 100644 --- a/paddle/fluid/eager/legacy/op_runner.cc +++ b/paddle/fluid/eager/legacy/op_runner.cc @@ -92,6 +92,8 @@ void OpRunImpl(const paddle::framework::OperatorBase& op, void RunOp(const std::string& type, const NameTensorMap& ins, const NameTensorMap& outs, paddle::framework::AttributeMap attrs, const paddle::platform::Place& place, + paddle::framework::AttributeMap* default_attrs, + bool override_default_attr_map, const std::map& inplace_map) { VLOG(1) << "Run Op: " << type; if (FLAGS_use_mkldnn) { @@ -108,16 +110,24 @@ void RunOp(const std::string& type, const NameTensorMap& ins, } } auto op = paddle::framework::OpRegistry::CreateOp(type, {}, {}, {}, false); - const auto& op_info = op->Info(); - auto* attr_checker = op_info.Checker(); - if (attr_checker) { - attr_checker->Check(&attrs, true, /*only_check_exist_value=*/true); + + PADDLE_ENFORCE_NOT_NULL(default_attrs, + paddle::platform::errors::PermissionDenied( + "Detected default_attrs = nullptr.")); + + if (override_default_attr_map) { + const auto& op_info = op->Info(); + auto* attr_checker = op_info.Checker(); + if (attr_checker) { + attr_checker->Check(&attrs, true, /*only_check_exist_value=*/true); + } + + static paddle::framework::AttributeMap empty_attrs_map = {}; + *default_attrs = attr_checker == nullptr + ? empty_attrs_map + : attr_checker->GetDefaultAttrMap(); } - static paddle::framework::AttributeMap empty_attrs_map = {}; - const paddle::framework::AttributeMap& default_attrs = - attr_checker == nullptr ? empty_attrs_map - : attr_checker->GetDefaultAttrMap(); auto amp_level = egr::Controller::Instance().GetAMPLevel(); NameTensorMap new_ins = ins; if (amp_level == 1) { @@ -155,7 +165,7 @@ void RunOp(const std::string& type, const NameTensorMap& ins, #endif } - OpRunImpl(*op, new_ins, outs, attrs, default_attrs, place); + OpRunImpl(*op, new_ins, outs, attrs, *default_attrs, place); } catch (paddle::platform::EnforceNotMet& exception) { paddle::framework::AppendErrorOpHint(type, &exception); throw std::move(exception); diff --git a/paddle/fluid/eager/legacy/op_runner.h b/paddle/fluid/eager/legacy/op_runner.h index 8c20f76f089ab..84745dfe6d737 100644 --- a/paddle/fluid/eager/legacy/op_runner.h +++ b/paddle/fluid/eager/legacy/op_runner.h @@ -23,5 +23,7 @@ namespace egr { void RunOp(const std::string& type, const NameTensorMap& ins, const NameTensorMap& outs, paddle::framework::AttributeMap attrs, const paddle::platform::Place& place, + paddle::framework::AttributeMap* default_attrs, + bool override_default_attr_map, const std::map& inplace_map = {}); } diff --git a/paddle/fluid/eager/legacy/prepared_operator.cc b/paddle/fluid/eager/legacy/prepared_operator.cc index 8757d950a391d..3f165b3a97024 100644 --- a/paddle/fluid/eager/legacy/prepared_operator.cc +++ b/paddle/fluid/eager/legacy/prepared_operator.cc @@ -23,7 +23,7 @@ #include "paddle/fluid/platform/xpu/xpu_op_list.h" #endif DECLARE_bool(check_nan_inf); -DECLARE_bool(run_pt_kernel); +DECLARE_bool(run_pten_kernel); namespace egr { diff --git a/paddle/fluid/eager/tests/benchmark/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/benchmark/benchmark_eager_cpu.cc index 5d2d92f1d85e8..a85b1e9f49a1b 100644 --- a/paddle/fluid/eager/tests/benchmark/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/benchmark/benchmark_eager_cpu.cc @@ -17,6 +17,7 @@ #include #include "gtest/gtest.h" +#include "paddle/fluid/platform/flags.h" #include "paddle/fluid/eager/api/api.h" #include "paddle/fluid/eager/autograd_meta.h" @@ -30,10 +31,16 @@ #ifdef WITH_GPERFTOOLS #include "gperftools/profiler.h" #endif + // TODO(jiabin): remove nolint here!!! using namespace egr; // NOLINT -TEST(Benchmark, EagerScalePerformance) { +// Disable pten path +DECLARE_bool(run_pten_kernel); + +TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } + +TEST(Benchmark, EagerScaleCPU) { // Prepare Device Contexts egr::InitEnv(paddle::platform::CPUPlace()); @@ -45,7 +52,7 @@ TEST(Benchmark, EagerScalePerformance) { RetainGradForTensor(tensor); if (mode == "Accuracy") { - benchmark_eager_scale_accuracy_check(tensor); + benchmark_eager_scale(tensor, true /* accuracy_check*/); } else if (mode == "Performance") { auto t_start = std::chrono::high_resolution_clock::now(); @@ -53,6 +60,7 @@ TEST(Benchmark, EagerScalePerformance) { ProfilerStart("eager_scale_cpu.out"); #endif benchmark_eager_scale(tensor); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif @@ -68,7 +76,7 @@ TEST(Benchmark, EagerScalePerformance) { } } -TEST(Benchmark, EagerIntermediateMatmulPerformance) { +TEST(Benchmark, EagerIntermediateMatmulCPU) { // Prepare Device Contexts InitEnv(paddle::platform::CPUPlace()); @@ -89,7 +97,7 @@ TEST(Benchmark, EagerIntermediateMatmulPerformance) { RetainGradForTensor(Y); if (mode == "Accuracy") { - benchmark_eager_intermediate_matmul_accuracy_check(X, Y); + benchmark_eager_intermediate_matmul(X, Y, true /* accuracy_check */); } else if (mode == "Performance") { auto t_start = std::chrono::high_resolution_clock::now(); @@ -97,6 +105,66 @@ TEST(Benchmark, EagerIntermediateMatmulPerformance) { ProfilerStart("eager_intermediate_matmul_cpu.out"); #endif benchmark_eager_intermediate_matmul(X, Y); + +#ifdef WITH_GPERFTOOLS + ProfilerStop(); +#endif + auto t_end = std::chrono::high_resolution_clock::now(); + double elapsed_time_ms = + std::chrono::duration(t_end - t_start).count(); + std::cout << "Duration: " << elapsed_time_ms << " ms" << std::endl; + + } else { + PADDLE_THROW(paddle::platform::errors::Fatal("Unknown benchmark mode")); + } + } +} + +TEST(Benchmark, EagerIntermediateMLPCPU) { + // Prepare Device Contexts + InitEnv(paddle::platform::CPUPlace()); + + auto tracer = std::make_shared(); + paddle::imperative::SetCurrentTracer(tracer); + + for (const std::string& mode : {"Accuracy", "Performance"}) { + paddle::framework::DDim ddimX = + paddle::framework::make_ddim({MLP_M, MLP_N}); + egr::EagerTensor X = EagerUtils::CreateTensorWithValue( + ddimX, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, MLP_X_VAL, true); + RetainGradForTensor(X); + + std::vector Ws; + std::vector Bs; + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + paddle::framework::DDim ddimW = + paddle::framework::make_ddim({MLP_N, MLP_K}); + egr::EagerTensor W = EagerUtils::CreateTensorWithValue( + ddimW, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, MLP_W_VAL, true); + RetainGradForTensor(W); + + paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K}); + egr::EagerTensor B = EagerUtils::CreateTensorWithValue( + ddimB, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, MLP_B_VAL, true); + RetainGradForTensor(B); + + Ws.emplace_back(std::move(W)); + Bs.emplace_back(std::move(B)); + } + + if (mode == "Accuracy") { + benchmark_eager_intermediate_mlp(X, Ws, Bs, true /* accuracy_check */); + + } else if (mode == "Performance") { + auto t_start = std::chrono::high_resolution_clock::now(); +#ifdef WITH_GPERFTOOLS + ProfilerStart("eager_intermediate_mlp_cpu.out"); +#endif + benchmark_eager_intermediate_mlp(X, Ws, Bs); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif diff --git a/paddle/fluid/eager/tests/benchmark/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/benchmark/benchmark_eager_cuda.cc index 7cacaea8c8467..8f7eaf99d5dc6 100644 --- a/paddle/fluid/eager/tests/benchmark/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/benchmark/benchmark_eager_cuda.cc @@ -16,6 +16,7 @@ #include #include "gtest/gtest.h" +#include "paddle/fluid/platform/flags.h" #include "paddle/fluid/eager/api/api.h" #include "paddle/fluid/eager/autograd_meta.h" @@ -25,13 +26,19 @@ #include "paddle/fluid/eager/tests/benchmark/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" + #ifdef WITH_GPERFTOOLS #include "gperftools/profiler.h" #endif + // TODO(jiabin): remove nolint here!!! using namespace egr; // NOLINT -TEST(Benchmark, EagerScalePerformance) { +DECLARE_bool(run_pten_kernel); + +TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } + +TEST(Benchmark, EagerScaleCUDA) { egr::InitEnv(paddle::platform::CUDAPlace()); for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { @@ -42,7 +49,7 @@ TEST(Benchmark, EagerScalePerformance) { RetainGradForTensor(tensor); if (mode == "Accuracy") { - benchmark_eager_scale_accuracy_check(tensor); + benchmark_eager_scale(tensor, true /* accuracy_check */); } else if (mode == "WarmUp") { benchmark_eager_scale(tensor); @@ -53,6 +60,7 @@ TEST(Benchmark, EagerScalePerformance) { ProfilerStart("eager_scale_cuda.out"); #endif benchmark_eager_scale(tensor); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif @@ -67,7 +75,7 @@ TEST(Benchmark, EagerScalePerformance) { } } -TEST(Benchmark, EagerIntermediateMatmulPerformance) { +TEST(Benchmark, EagerIntermediateMatmulCUDA) { paddle::platform::CUDAPlace place; egr::InitEnv(place); @@ -89,7 +97,7 @@ TEST(Benchmark, EagerIntermediateMatmulPerformance) { RetainGradForTensor(Y); if (mode == "Accuracy") { - benchmark_eager_intermediate_matmul_accuracy_check(X, Y); + benchmark_eager_intermediate_matmul(X, Y, true /* accuracy_check */); } else if (mode == "WarmUp") { benchmark_eager_intermediate_matmul(X, Y); @@ -100,6 +108,70 @@ TEST(Benchmark, EagerIntermediateMatmulPerformance) { ProfilerStart("eager_intermediate_matmul_cuda.out"); #endif benchmark_eager_intermediate_matmul(X, Y); + +#ifdef WITH_GPERFTOOLS + ProfilerStop(); +#endif + auto t_end = std::chrono::high_resolution_clock::now(); + double elapsed_time_ms = + std::chrono::duration(t_end - t_start).count(); + std::cout << "Duration: " << elapsed_time_ms << " ms" << std::endl; + + } else { + PADDLE_THROW(paddle::platform::errors::Fatal("Unknown benchmark mode")); + } + } +} + +TEST(Benchmark, EagerIntermediateMLPCUDA) { + paddle::platform::CUDAPlace place; + egr::InitEnv(place); + + auto tracer = std::make_shared(); + tracer->SetExpectedPlace(place); + paddle::imperative::SetCurrentTracer(tracer); + + for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { + paddle::framework::DDim ddimX = + paddle::framework::make_ddim({MLP_M, MLP_N}); + egr::EagerTensor X = EagerUtils::CreateTensorWithValue( + ddimX, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, MLP_X_VAL, true); + RetainGradForTensor(X); + + std::vector Ws; + std::vector Bs; + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + paddle::framework::DDim ddimW = + paddle::framework::make_ddim({MLP_N, MLP_K}); + egr::EagerTensor W = EagerUtils::CreateTensorWithValue( + ddimW, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, MLP_W_VAL, true); + RetainGradForTensor(W); + + paddle::framework::DDim ddimB = paddle::framework::make_ddim({MLP_K}); + egr::EagerTensor B = EagerUtils::CreateTensorWithValue( + ddimB, paddle::platform::CUDAPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, MLP_B_VAL, true); + RetainGradForTensor(B); + + Ws.emplace_back(std::move(W)); + Bs.emplace_back(std::move(B)); + } + + if (mode == "Accuracy") { + benchmark_eager_intermediate_mlp(X, Ws, Bs, true /* accuracy_check */); + + } else if (mode == "WarmUp") { + benchmark_eager_intermediate_mlp(X, Ws, Bs); + + } else if (mode == "Performance") { + auto t_start = std::chrono::high_resolution_clock::now(); +#ifdef WITH_GPERFTOOLS + ProfilerStart("eager_intermediate_mlp_cuda.out"); +#endif + benchmark_eager_intermediate_mlp(X, Ws, Bs); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif diff --git a/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cpu.cc index 1b1abe080a851..20844055e300d 100644 --- a/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cpu.cc +++ b/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cpu.cc @@ -33,10 +33,16 @@ #ifdef WITH_GPERFTOOLS #include "gperftools/profiler.h" #endif + +// Disable pten path +DECLARE_bool(run_pten_kernel); + +TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } + namespace paddle { namespace imperative { -TEST(Benchmark, FluidScalePerformance) { +TEST(Benchmark, FluidScaleCPU) { // Prepare Device Contexts platform::CPUPlace place; egr::InitEnv(place); @@ -45,8 +51,6 @@ TEST(Benchmark, FluidScalePerformance) { std::shared_ptr X(new imperative::VarBase(true, "X")); X->SetOverridedStopGradient(false); - std::shared_ptr Out( - new imperative::VarBase(true, "Out")); std::vector src_data(128, 5.0); std::vector dims = {2, 4, 4, 4}; @@ -57,14 +61,16 @@ TEST(Benchmark, FluidScalePerformance) { sizeof(float) * src_data.size()); if (mode == "Accuracy") { - benchmark_fluid_scale_accuracy_check(X, Out, platform::Place(place)); + benchmark_fluid_scale(X, platform::Place(place), + true /* accuracy_check */); } else if (mode == "Performance") { auto t_start = std::chrono::high_resolution_clock::now(); #ifdef WITH_GPERFTOOLS ProfilerStart("fluid_scale_cpu.out"); #endif - benchmark_fluid_scale(X, Out, platform::Place(place)); + benchmark_fluid_scale(X, platform::Place(place)); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif @@ -79,7 +85,7 @@ TEST(Benchmark, FluidScalePerformance) { } } -TEST(Benchmark, FluidMatmulAccuracy) { +TEST(Benchmark, FluidMatmulCPU) { // Prepare Device Contexts platform::CPUPlace place; egr::InitEnv(place); @@ -90,8 +96,6 @@ TEST(Benchmark, FluidMatmulAccuracy) { std::shared_ptr Y(new imperative::VarBase(true, "Y")); Y->SetOverridedStopGradient(false); - std::shared_ptr Out( - new imperative::VarBase(true, "Out")); std::vector x_src_data(4, 1.0); std::vector y_src_data(4, 2.0); std::vector dims = {2, 2}; @@ -109,14 +113,91 @@ TEST(Benchmark, FluidMatmulAccuracy) { sizeof(float) * y_src_data.size()); if (mode == "Accuracy") { - benchmark_fluid_matmul_accuracy_check(X, Y, Out, platform::Place(place)); + benchmark_fluid_matmul(X, Y, platform::Place(place), + true /* accuracy_check */); } else if (mode == "Performance") { auto t_start = std::chrono::high_resolution_clock::now(); #ifdef WITH_GPERFTOOLS ProfilerStart("fluid_matmul_cpu.out"); #endif - benchmark_fluid_matmul(X, Y, Out, platform::Place(place)); + benchmark_fluid_matmul(X, Y, platform::Place(place)); + +#ifdef WITH_GPERFTOOLS + ProfilerStop(); +#endif + auto t_end = std::chrono::high_resolution_clock::now(); + double elapsed_time_ms = + std::chrono::duration(t_end - t_start).count(); + + std::cout << "Duration: " << elapsed_time_ms << " ms" << std::endl; + + } else { + PADDLE_THROW(paddle::platform::errors::Fatal("Unknown benchmark mode")); + } + } +} + +TEST(Benchmark, FluidMLPCPU) { + // Prepare Device Contexts + platform::CPUPlace place; + egr::InitEnv(place); + + for (const std::string& mode : {"Accuracy", "Performance"}) { + std::vector x_src_data(MLP_M * MLP_N, MLP_X_VAL); + std::vector w_src_data(MLP_N * MLP_K, MLP_W_VAL); + std::vector b_src_data(MLP_K, MLP_B_VAL); + + std::vector x_dims = {MLP_M, MLP_N}; + std::vector w_dims = {MLP_N, MLP_K}; + std::vector b_dims = {MLP_K}; + + std::shared_ptr X(new imperative::VarBase(true, "X")); + X->SetOverridedStopGradient(false); + + auto* x_tensor = X->MutableVar()->GetMutable(); + x_tensor->Resize(framework::make_ddim(x_dims)); + auto* mutable_x = x_tensor->mutable_data(place); + paddle::memory::Copy(place, mutable_x, place, x_src_data.data(), + sizeof(float) * x_src_data.size()); + + std::vector> Ws; + std::vector> Bs; + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + std::shared_ptr W( + new imperative::VarBase(true, "W")); + W->SetOverridedStopGradient(false); + std::shared_ptr B( + new imperative::VarBase(true, "B")); + B->SetOverridedStopGradient(false); + + auto* w_tensor = W->MutableVar()->GetMutable(); + w_tensor->Resize(framework::make_ddim(w_dims)); + auto* mutable_w = w_tensor->mutable_data(place); + paddle::memory::Copy(place, mutable_w, place, w_src_data.data(), + sizeof(float) * w_src_data.size()); + + auto* b_tensor = B->MutableVar()->GetMutable(); + b_tensor->Resize(framework::make_ddim(b_dims)); + auto* mutable_b = b_tensor->mutable_data(place); + paddle::memory::Copy(place, mutable_b, place, b_src_data.data(), + sizeof(float) * b_src_data.size()); + + Ws.emplace_back(std::move(W)); + Bs.emplace_back(std::move(B)); + } + + if (mode == "Accuracy") { + benchmark_fluid_mlp(X, Ws, Bs, platform::Place(place), + true /* accuracy_check */); + + } else if (mode == "Performance") { + auto t_start = std::chrono::high_resolution_clock::now(); +#ifdef WITH_GPERFTOOLS + ProfilerStart("fluid_mlp_cpu.out"); +#endif + benchmark_fluid_mlp(X, Ws, Bs, platform::Place(place)); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif @@ -137,3 +218,4 @@ TEST(Benchmark, FluidMatmulAccuracy) { USE_OP(scale); USE_OP(matmul_v2); +USE_OP(reduce_sum); diff --git a/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cuda.cc index dd75064ebbf4b..620a4d1cd128d 100644 --- a/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/benchmark/benchmark_fluid_cuda.cc @@ -34,10 +34,15 @@ #include "gperftools/profiler.h" #endif +// Disable pten path +DECLARE_bool(run_pten_kernel); + +TEST(Benchmark, Init) { FLAGS_run_pten_kernel = false; } + namespace paddle { namespace imperative { -TEST(Benchmark, FluidScalePerformance) { +TEST(Benchmark, FluidScaleCUDA) { // Prepare Device Contexts platform::CUDAPlace place; egr::InitEnv(place); @@ -46,8 +51,6 @@ TEST(Benchmark, FluidScalePerformance) { std::shared_ptr X(new imperative::VarBase(true, "X")); X->SetOverridedStopGradient(false); - std::shared_ptr Out( - new imperative::VarBase(true, "Out")); std::vector src_data(128, 5.0); std::vector dims = {2, 4, 4, 4}; @@ -65,17 +68,19 @@ TEST(Benchmark, FluidScalePerformance) { stream); if (mode == "Accuracy") { - benchmark_fluid_scale_accuracy_check(X, Out, platform::Place(place)); + benchmark_fluid_scale(X, platform::Place(place), + true /* accuracy_check */); } else if (mode == "WarmUp") { - benchmark_fluid_scale(X, Out, platform::Place(place)); + benchmark_fluid_scale(X, platform::Place(place)); } else if (mode == "Performance") { auto t_start = std::chrono::high_resolution_clock::now(); #ifdef WITH_GPERFTOOLS ProfilerStart("fluid_scale_cuda.out"); #endif - benchmark_fluid_scale(X, Out, platform::Place(place)); + benchmark_fluid_scale(X, platform::Place(place)); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif @@ -90,7 +95,7 @@ TEST(Benchmark, FluidScalePerformance) { } } -TEST(Benchmark, FluidMatmulPerformance) { +TEST(Benchmark, FluidMatmulCUDA) { // Prepare Device Contexts platform::CUDAPlace place; egr::InitEnv(place); @@ -101,8 +106,6 @@ TEST(Benchmark, FluidMatmulPerformance) { std::shared_ptr Y(new imperative::VarBase(true, "Y")); Y->SetOverridedStopGradient(false); - std::shared_ptr Out( - new imperative::VarBase(true, "Out")); std::vector x_src_data(4, 1.0); std::vector y_src_data(4, 2.0); std::vector dims = {2, 2}; @@ -128,23 +131,112 @@ TEST(Benchmark, FluidMatmulPerformance) { stream); if (mode == "Accuracy") { - benchmark_fluid_matmul_accuracy_check(X, Y, Out, platform::Place(place)); + benchmark_fluid_matmul(X, Y, platform::Place(place), + true /* accuracy_check */); } else if (mode == "WarmUp") { - benchmark_fluid_matmul(X, Y, Out, platform::Place(place)); + benchmark_fluid_matmul(X, Y, platform::Place(place)); } else if (mode == "Performance") { auto t_start = std::chrono::high_resolution_clock::now(); #ifdef WITH_GPERFTOOLS ProfilerStart("fluid_matmul_cuda.out"); #endif - benchmark_fluid_matmul(X, Y, Out, platform::Place(place)); + benchmark_fluid_matmul(X, Y, platform::Place(place)); + +#ifdef WITH_GPERFTOOLS + ProfilerStop(); +#endif + auto t_end = std::chrono::high_resolution_clock::now(); + double elapsed_time_ms = + std::chrono::duration(t_end - t_start).count(); + std::cout << "Duration: " << elapsed_time_ms << " ms" << std::endl; + + } else { + PADDLE_THROW(paddle::platform::errors::Fatal("Unknown benchmark mode")); + } + } +} + +TEST(Benchmark, FluidMLPCUDA) { + // Prepare Device Contexts + platform::CUDAPlace place; + egr::InitEnv(place); + + for (const std::string& mode : {"Accuracy", "WarmUp", "Performance"}) { + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = + dynamic_cast(pool.Get(place)); + auto stream = dev_ctx->stream(); + + std::vector x_src_data(MLP_M * MLP_N, MLP_X_VAL); + std::vector w_src_data(MLP_N * MLP_K, MLP_W_VAL); + std::vector b_src_data(MLP_K, MLP_B_VAL); + + std::vector x_dims = {MLP_M, MLP_N}; + std::vector w_dims = {MLP_N, MLP_K}; + std::vector b_dims = {MLP_K}; + + std::shared_ptr X(new imperative::VarBase(true, "X")); + X->SetOverridedStopGradient(false); + + auto* x_tensor = X->MutableVar()->GetMutable(); + x_tensor->Resize(framework::make_ddim(x_dims)); + auto* mutable_x = x_tensor->mutable_data(place); + paddle::memory::Copy(place, mutable_x, platform::CPUPlace(), + x_src_data.data(), sizeof(float) * x_src_data.size(), + stream); + + std::vector> Ws; + std::vector> Bs; + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + std::shared_ptr W( + new imperative::VarBase(true, "W")); + W->SetOverridedStopGradient(false); + std::shared_ptr B( + new imperative::VarBase(true, "B")); + B->SetOverridedStopGradient(false); + + auto* w_tensor = W->MutableVar()->GetMutable(); + w_tensor->Resize(framework::make_ddim(w_dims)); + auto* mutable_w = w_tensor->mutable_data(place); + paddle::memory::Copy(place, mutable_w, platform::CPUPlace(), + w_src_data.data(), sizeof(float) * w_src_data.size(), + stream); + + auto* b_tensor = B->MutableVar()->GetMutable(); + b_tensor->Resize(framework::make_ddim(b_dims)); + auto* mutable_b = b_tensor->mutable_data(place); + paddle::memory::Copy(place, mutable_b, platform::CPUPlace(), + b_src_data.data(), sizeof(float) * b_src_data.size(), + stream); + + Ws.emplace_back(std::move(W)); + Bs.emplace_back(std::move(B)); + } + + if (mode == "Accuracy") { + benchmark_fluid_mlp(X, Ws, Bs, platform::Place(place), + true /* accuracy_check */); + + } else if (mode == "WarmUp") { + benchmark_fluid_mlp(X, Ws, Bs, platform::Place(place)); + + } else if (mode == "Performance") { + auto t_start = std::chrono::high_resolution_clock::now(); +#ifdef WITH_GPERFTOOLS + ProfilerStart("fluid_mlp_cuda.out"); +#endif + benchmark_fluid_mlp(X, Ws, Bs, platform::Place(place)); + #ifdef WITH_GPERFTOOLS ProfilerStop(); #endif auto t_end = std::chrono::high_resolution_clock::now(); double elapsed_time_ms = std::chrono::duration(t_end - t_start).count(); + std::cout << "Duration: " << elapsed_time_ms << " ms" << std::endl; } else { @@ -158,3 +250,5 @@ TEST(Benchmark, FluidMatmulPerformance) { USE_OP(scale); USE_OP(matmul_v2); +USE_OP(reduce_sum); +USE_OP(reduce_sum_grad); diff --git a/paddle/fluid/eager/tests/benchmark/benchmark_utils.cc b/paddle/fluid/eager/tests/benchmark/benchmark_utils.cc index c753e42c4e199..280e08fbdab33 100644 --- a/paddle/fluid/eager/tests/benchmark/benchmark_utils.cc +++ b/paddle/fluid/eager/tests/benchmark/benchmark_utils.cc @@ -42,140 +42,108 @@ namespace egr { /* --------------------- */ /* ---- Eager Scale ---- */ /* --------------------- */ -void benchmark_eager_scale_accuracy_check(const egr::EagerTensor& tensor) { - egr::EagerTensor input_tensor = tensor; +void benchmark_eager_scale(const EagerTensor& tensor, bool accuracy_check) { + EagerTensor input_tensor = tensor; float scale = 2.0; float bias = 3.0; - size_t max_num_runs = 10; + size_t max_num_runs = accuracy_check ? 10 : max_num_benchmark_runs; for (size_t i = 0; i < max_num_runs; i++) { input_tensor = egr::scale(input_tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/); } - std::vector target_tensors = {input_tensor}; + std::vector target_tensors = {input_tensor}; RunBackward(target_tensors, {}); - // Examine Forward Grad (w.r.t max_num_runs = 10) - PADDLE_ENFORCE( - CompareTensorWithValue(input_tensor, 8189.0) == true, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 8189.0)); - // Examine Backward Grad (w.r.t max_num_runs = 10) - PADDLE_ENFORCE( - CompareGradTensorWithValue(tensor, 1024.0) == true, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 1024.0)); -} - -void benchmark_eager_scale(const egr::EagerTensor& tensor) { - egr::EagerTensor input_tensor = tensor; - float scale = 2.0; - float bias = 3.0; - - for (size_t i = 0; i < max_num_benchmark_runs; i++) { - input_tensor = - egr::scale(input_tensor, scale, bias, true /*bias_after_scale*/, - true /*trace_backward*/); + if (accuracy_check) { + // Examine Forward Grad (w.r.t max_num_runs = 10) + PADDLE_ENFORCE(CompareTensorWithValue(input_tensor, 8189.0) == true, + paddle::platform::errors::Fatal( + "Numerical Error, Expected %f", 8189.0)); + // Examine Backward Grad (w.r.t max_num_runs = 10) + PADDLE_ENFORCE(CompareGradTensorWithValue(tensor, 1024.0) == true, + paddle::platform::errors::Fatal( + "Numerical Error, Expected %f", 1024.0)); } - - std::vector target_tensors = {input_tensor}; - RunBackward(target_tensors, {}); } /* ----------------------------------- */ /* ---- Eager Intermediate Matmul ---- */ /* ----------------------------------- */ -void benchmark_eager_intermediate_matmul_accuracy_check( - const egr::EagerTensor& X, const egr::EagerTensor& Y) { - egr::EagerTensor input_tensor0 = X; +void benchmark_eager_intermediate_matmul(const EagerTensor& X, + const EagerTensor& Y, + bool accuracy_check) { + EagerTensor input_tensor0 = X; - size_t max_num_runs = 2; + size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs; for (size_t i = 0; i < max_num_runs; i++) { input_tensor0 = matmul_v2_dygraph_function( - input_tensor0, Y, false /*trans_x*/, false /*trans_y*/, - {} /**fused_reshape_Out**/, {} /**fused_transpose_Out**/, - false /*use_mkldnn*/, "float32" /*mkldnn_data_type*/, 0 /*op_role*/, - {} /*op_role_var*/, "" /*op_namescope*/, {} /*op_callstack*/, - "" /*op_device*/, false /*with_quant_attr*/, true /*trace_backward*/); + input_tensor0, Y, {{"trans_x", false}, {"trans_y", false}}); } - std::vector target_tensors = {input_tensor0}; + std::vector target_tensors = {input_tensor0}; RunBackward(target_tensors, {}); - // Examine Forward Grad (w.r.t max_num_runs = 2) - PADDLE_ENFORCE( - CompareVariableWithValue(input_tensor0, 16) == true, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 16.0)); - // Examine Backward Grad (w.r.t max_num_runs = 2) - PADDLE_ENFORCE( - CompareGradVariableWithValue(X, 16) == true, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 16.0)); - PADDLE_ENFORCE( - CompareGradVariableWithValue(Y, 16) == true, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 16.0)); + if (accuracy_check) { + // Examine Forward Grad (w.r.t max_num_runs = 2) + PADDLE_ENFORCE( + CompareVariableWithValue(input_tensor0, 16) == true, + paddle::platform::errors::Fatal("Numerical Error, Expected %f", 16.0)); + // Examine Backward Grad (w.r.t max_num_runs = 2) + PADDLE_ENFORCE( + CompareGradVariableWithValue(X, 16) == true, + paddle::platform::errors::Fatal("Numerical Error, Expected %f", 16.0)); + PADDLE_ENFORCE( + CompareGradVariableWithValue(Y, 16) == true, + paddle::platform::errors::Fatal("Numerical Error, Expected %f", 16.0)); + } } -void benchmark_eager_intermediate_matmul(const egr::EagerTensor& X, - const egr::EagerTensor& Y) { - egr::EagerTensor input_tensor0 = X; - for (size_t i = 0; i < max_num_benchmark_runs; i++) { - input_tensor0 = matmul_v2_dygraph_function( - input_tensor0, Y, false /*trans_x*/, false /*trans_y*/, - {} /**fused_reshape_Out**/, {} /**fused_transpose_Out**/, - false /*use_mkldnn*/, "float32" /*mkldnn_data_type*/, 0 /*op_role*/, - {} /*op_role_var*/, "" /*op_namescope*/, {} /*op_callstack*/, - "" /*op_device*/, false /*with_quant_attr*/, true /*trace_backward*/); - } +/* -------------------------------- */ +/* ---- Eager Intermediate MLP ---- */ +/* -------------------------------- */ +void benchmark_eager_intermediate_mlp(const EagerTensor& X, + const std::vector& Ws, + const std::vector& Bs, + bool accuracy_check) { + EagerTensor input0 = X; - std::vector target_tensors = {input_tensor0}; - RunBackward(target_tensors, {}); -} + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + EagerTensor Out = matmul_v2_dygraph_function( + input0, Ws[i], {{"trans_x", false}, {"trans_y", false}}); -} // namespace egr + input0 = elementwise_add_dygraph_function(Out, Bs[i], {}); + } -namespace paddle { -namespace imperative { + EagerTensor Out = reduce_sum_dygraph_function(input0, {{"reduce_all", true}}); -/* --------------------- */ -/* ---- Fluid Scale ---- */ -/* --------------------- */ -// TODO(jiabin): Change this and remove nolint -void benchmark_fluid_scale_accuracy_check( - const std::shared_ptr& X, - const std::shared_ptr& Out, - const paddle::platform::Place& place) { - imperative::Tracer tracer; - framework::AttributeMap attrs; + std::vector target_tensors = {Out}; + RunBackward(target_tensors, {}); - attrs["use_mkldnn"] = false; - attrs["scale"] = 2; - attrs["bias"] = 3; - attrs["bias_after_scale"] = true; + if (accuracy_check) { + std::unordered_map result = + compute_mlp_expected_results(); - // NameVarBaseMap = std::map>> - imperative::NameVarBaseMap outs = {{"Out", {Out}}}; - imperative::NameVarBaseMap ins = {{"X", {X}}}; + // Examine Forward Grad (w.r.t max_num_runs = 2) + CompareVariableWithValue(Out, result["Out"]); - size_t max_num_runs = 10; - for (size_t i = 0; i < max_num_runs; i++) { - tracer.TraceOp("scale", ins, outs, attrs, place, true); - if (i != max_num_runs - 1) { - ins = {{"X", outs["Out"]}}; - outs = {{"Out", - {std::shared_ptr( - new imperative::VarBase(true, "Out"))}}}; - } + // Examine Backward Grad (w.r.t max_num_runs = 2) + CompareGradVariableWithValue(X, result["GradX"]); + CompareGradVariableWithValue(Ws[0], result["GradW"]); } +} - auto* engine = tracer.GetEngine(); - std::vector> grad_tensors{nullptr}; - engine->Init(outs["Out"], grad_tensors, false /*retain_graph*/); - engine->Execute(); +} // namespace egr - // Fwd Check: Expects 8189 with max_num_runs = 10 - auto* tensor = - outs["Out"][0]->MutableVar()->GetMutable(); +namespace paddle { +namespace imperative { + +static void FluidCheckTensorValue(const std::shared_ptr& X, + const paddle::platform::Place& place, + float value) { + auto* tensor = X->MutableVar()->GetMutable(); float* t_ptr = tensor->mutable_data(place); std::vector host_data(tensor->numel()); if (place == paddle::platform::CUDAPlace()) { @@ -190,11 +158,14 @@ void benchmark_fluid_scale_accuracy_check( sizeof(float) * tensor->numel(), stream); t_ptr = host_data.data(); } - PADDLE_ENFORCE( - t_ptr[0] == 8189.0, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 8189.0)); + VLOG(6) << "Tensor Value: " << t_ptr[0] << ", Expected Value: " << value; + PADDLE_ENFORCE(t_ptr[0] == value, paddle::platform::errors::Fatal( + "Numerical Error, Expected %f", value)); +} - // Grad Check: Expects 1024.0 with max_num_runs = 10 +static void FluidCheckGradTensorValue( + const std::shared_ptr& X, + const paddle::platform::Place& place, float value) { auto* grad_tensor = X->MutableGradVar()->GetMutable(); float* g_ptr = grad_tensor->mutable_data(place); std::vector g_host_data(grad_tensor->numel()); @@ -210,15 +181,18 @@ void benchmark_fluid_scale_accuracy_check( sizeof(float) * grad_tensor->numel(), stream); g_ptr = g_host_data.data(); } - PADDLE_ENFORCE( - g_ptr[0] == 1024.0, - paddle::platform::errors::Fatal("Numerical Error, Expected %f", 1024.0)); + VLOG(6) << "Tensor Value: " << g_ptr[0] << ", Expected Value: " << value; + PADDLE_ENFORCE(g_ptr[0] == value, paddle::platform::errors::Fatal( + "Numerical Error, Expected %f", value)); } +/* --------------------- */ +/* ---- Fluid Scale ---- */ +/* --------------------- */ // TODO(jiabin): Change this and remove nolint void benchmark_fluid_scale(const std::shared_ptr& X, - const std::shared_ptr& Out, - const paddle::platform::Place& place) { + const paddle::platform::Place& place, + bool accuracy_check) { imperative::Tracer tracer; framework::AttributeMap attrs; @@ -227,144 +201,124 @@ void benchmark_fluid_scale(const std::shared_ptr& X, attrs["bias"] = 3; attrs["bias_after_scale"] = true; - // NameVarBaseMap = std::map>> - imperative::NameVarBaseMap outs = {{"Out", {Out}}}; - imperative::NameVarBaseMap ins = {{"X", {X}}}; + std::shared_ptr tmp_out = X; + + size_t max_num_runs = accuracy_check ? 10 : max_num_benchmark_runs; + for (size_t i = 0; i < max_num_runs; i++) { + imperative::NameVarBaseMap ins = {{"X", {tmp_out}}}; + imperative::NameVarBaseMap outs = { + {"Out", + {std::shared_ptr( + new imperative::VarBase(true, "Out"))}}}; - for (size_t i = 0; i < max_num_benchmark_runs; i++) { tracer.TraceOp("scale", ins, outs, attrs, place, true); - if (i != max_num_benchmark_runs - 1) { - ins = {{"X", outs["Out"]}}; - outs = {{"Out", - {std::shared_ptr( - new imperative::VarBase(true, "Out"))}}}; - } + + tmp_out = outs["Out"][0]; } auto* engine = tracer.GetEngine(); std::vector> grad_tensors{nullptr}; - engine->Init(outs["Out"], grad_tensors, false /*retain_graph*/); + engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/); engine->Execute(); + + if (accuracy_check) { + FluidCheckTensorValue(tmp_out, place, 8189.0); + FluidCheckGradTensorValue(X, place, 1024.0); + } } /* ---------------------- */ /* ---- Fluid Matmul ---- */ /* ---------------------- */ -void benchmark_fluid_matmul_accuracy_check( - const std::shared_ptr& X, - const std::shared_ptr& Y, - const std::shared_ptr& Out, - const paddle::platform::Place& place) { +void benchmark_fluid_matmul(const std::shared_ptr& X, + const std::shared_ptr& Y, + const paddle::platform::Place& place, + bool accuracy_check) { imperative::Tracer tracer; - framework::AttributeMap attrs; - // NameVarBaseMap = std::map>> - imperative::NameVarBaseMap outs = {{"Out", {Out}}}; - imperative::NameVarBaseMap ins = {{"X", {X}}, {"Y", {Y}}}; + std::shared_ptr tmp_out = X; - size_t max_num_runs = 2; + size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs; for (size_t i = 0; i < max_num_runs; i++) { + framework::AttributeMap attrs; + imperative::NameVarBaseMap ins = {{"X", {tmp_out}}, {"Y", {Y}}}; + imperative::NameVarBaseMap outs = { + {"Out", + {std::shared_ptr( + new imperative::VarBase(true, "Out"))}}}; + tracer.TraceOp("matmul_v2", ins, outs, attrs, place, true); - if (i != max_num_runs - 1) { - ins = {{"X", outs["Out"]}, {"Y", {Y}}}; - outs = {{"Out", - {std::shared_ptr( - new imperative::VarBase(true, "Out"))}}}; - } + + tmp_out = outs["Out"][0]; } auto* engine = tracer.GetEngine(); std::vector> grad_tensors{nullptr}; - engine->Init(outs["Out"], grad_tensors, false /*retain_graph*/); + engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/); engine->Execute(); - auto* tensor = - outs["Out"][0]->MutableVar()->GetMutable(); - float* t_ptr = tensor->mutable_data(place); - std::vector host_data(tensor->numel()); - if (place == paddle::platform::CUDAPlace()) { - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - auto* dev_ctx = - dynamic_cast(pool.Get(place)); - auto stream = dev_ctx->stream(); - - paddle::memory::Copy(paddle::platform::CPUPlace(), host_data.data(), - paddle::platform::CUDAPlace(), t_ptr, - sizeof(float) * tensor->numel(), stream); - t_ptr = host_data.data(); - } - PADDLE_ENFORCE(t_ptr[0] == 16, paddle::platform::errors::Fatal( - "Numerical Error, Expected %f", 16)); - - { - auto* grad_tensor = X->MutableGradVar()->GetMutable(); - float* g_ptr = grad_tensor->mutable_data(place); - std::vector g_host_data(grad_tensor->numel()); - if (place == paddle::platform::CUDAPlace()) { - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - auto* dev_ctx = - dynamic_cast(pool.Get(place)); - auto stream = dev_ctx->stream(); - - paddle::memory::Copy(paddle::platform::CPUPlace(), g_host_data.data(), - paddle::platform::CUDAPlace(), g_ptr, - sizeof(float) * grad_tensor->numel(), stream); - g_ptr = g_host_data.data(); - } - PADDLE_ENFORCE(g_ptr[0] == 16, paddle::platform::errors::Fatal( - "Numerical Error, Expected %f", 16)); - } - - { - auto* grad_tensor = Y->MutableGradVar()->GetMutable(); - float* g_ptr = grad_tensor->mutable_data(place); - std::vector g_host_data(grad_tensor->numel()); - if (place == paddle::platform::CUDAPlace()) { - paddle::platform::DeviceContextPool& pool = - paddle::platform::DeviceContextPool::Instance(); - auto* dev_ctx = - dynamic_cast(pool.Get(place)); - auto stream = dev_ctx->stream(); - - paddle::memory::Copy(paddle::platform::CPUPlace(), g_host_data.data(), - paddle::platform::CUDAPlace(), g_ptr, - sizeof(float) * grad_tensor->numel(), stream); - g_ptr = g_host_data.data(); - } - PADDLE_ENFORCE(g_ptr[0] == 16, paddle::platform::errors::Fatal( - "Numerical Error, Expected %f", 16)); + if (accuracy_check) { + FluidCheckTensorValue(tmp_out, place, 16); + FluidCheckGradTensorValue(X, place, 16); + FluidCheckGradTensorValue(Y, place, 16); } } -void benchmark_fluid_matmul(const std::shared_ptr& X, - const std::shared_ptr& Y, - const std::shared_ptr& Out, - const paddle::platform::Place& place) { +/* ------------------- */ +/* ---- Fluid MLP ---- */ +/* ------------------- */ +void benchmark_fluid_mlp( + const std::shared_ptr& X, + const std::vector>& Ws, + const std::vector>& Bs, + const paddle::platform::Place& place, bool accuracy_check) { imperative::Tracer tracer; - std::shared_ptr tmp_out = X; - - for (size_t i = 0; i < max_num_benchmark_runs; i++) { - framework::AttributeMap attrs; - imperative::NameVarBaseMap ins = {{"X", {tmp_out}}, {"Y", {Y}}}; - imperative::NameVarBaseMap outs = { - {"Out", - {std::shared_ptr( - new imperative::VarBase(true, "Out"))}}}; + imperative::NameVarBaseMap ins; + imperative::NameVarBaseMap outs; + framework::AttributeMap attrs; + std::shared_ptr input0 = X; + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + // Matmul0 + ins = {{"X", {input0}}, {"Y", {Ws[0]}}}; + outs = {{"Out", + {std::shared_ptr( + new imperative::VarBase(true, "Out"))}}}; tracer.TraceOp("matmul_v2", ins, outs, attrs, place, true); - tmp_out = outs["Out"][0]; + // EW-Add0 + ins = {{"X", outs["Out"]}, {"Y", {Bs[i]}}}; + outs = {{"Out", + {std::shared_ptr( + new imperative::VarBase(true, "Out"))}}}; + + tracer.TraceOp("elementwise_add", ins, outs, attrs, place, true); + input0 = outs["Out"][0]; } + // ReduceSum + ins = {{"X", {input0}}}; + outs = {{"Out", + {std::shared_ptr( + new imperative::VarBase(true, "Out"))}}}; + attrs = {{"reduce_all", true}}; + + tracer.TraceOp("reduce_sum", ins, outs, attrs, place, true); + auto* engine = tracer.GetEngine(); std::vector> grad_tensors{nullptr}; - engine->Init({tmp_out}, grad_tensors, false /*retain_graph*/); + engine->Init(outs["Out"], grad_tensors, false /*retain_graph*/); engine->Execute(); + + if (accuracy_check) { + std::unordered_map result = + egr::compute_mlp_expected_results(); + + FluidCheckTensorValue(outs["Out"][0], place, result["Out"]); + FluidCheckGradTensorValue(X, place, result["GradX"]); + FluidCheckGradTensorValue(Ws[0], place, result["GradW"]); + } } } // namespace imperative diff --git a/paddle/fluid/eager/tests/benchmark/benchmark_utils.h b/paddle/fluid/eager/tests/benchmark/benchmark_utils.h index afb04336c9ed4..70ecf2af8e4c3 100644 --- a/paddle/fluid/eager/tests/benchmark/benchmark_utils.h +++ b/paddle/fluid/eager/tests/benchmark/benchmark_utils.h @@ -14,51 +14,82 @@ #pragma once +#include #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/pten/api/all.h" -#include "paddle/pten/include/core.h" + +/* MLP Configurations */ +// Out1 = X[M, N] x W[N, K] + B[K] +// ... x MLP_NUM_LINEAR +// Out = ReduceSum(OutN) +#define MLP_M 4 +#define MLP_N 16 +#define MLP_K MLP_N +#define MLP_X_VAL 1.0 +#define MLP_W_VAL 2.0 +#define MLP_B_VAL 3.0 +#define MLP_NUM_LINEAR 1000 + namespace egr { +inline std::unordered_map compute_mlp_expected_results() { + float Out = MLP_X_VAL; + for (size_t i = 0; i < MLP_NUM_LINEAR; i++) { + Out = Out * MLP_W_VAL * MLP_N + MLP_B_VAL; + } + Out = Out * MLP_M * MLP_N; + + float GradX = 1.0 * pow((MLP_W_VAL * MLP_N), MLP_NUM_LINEAR); + float GradW0 = + 1.0 * pow((MLP_W_VAL * MLP_N), (MLP_NUM_LINEAR - 1)) * MLP_X_VAL * MLP_M; + return {{"Out", Out}, {"GradX", GradX}, {"GradW", GradW0}}; +} + /* ---- Eager Scale ---- */ -void benchmark_eager_scale_accuracy_check(const egr::EagerTensor& tensor); -void benchmark_eager_scale(const egr::EagerTensor& tensor); +void benchmark_eager_scale(const EagerTensor& tensor, + bool accuracy_check = false); /* ---- Eager MatMul ---- */ -void benchmark_eager_intermediate_matmul_accuracy_check( - const egr::EagerTensor& X, const egr::EagerTensor& Y); -void benchmark_eager_intermediate_matmul(const egr::EagerTensor& X, - const egr::EagerTensor& Y); +/* +void benchmark_eager_matmul(const EagerTensor& X, const EagerTensor& Y, + bool accuracy_check = false); +void benchmark_eager_mlp(const EagerTensor& X, + const std::vector& Ws, + const std::vector& Bs, + bool accuracy_check = false); +*/ +void benchmark_eager_intermediate_matmul(const EagerTensor& X, + const EagerTensor& Y, + bool accuracy_check = false); + +void benchmark_eager_intermediate_mlp(const EagerTensor& X, + const std::vector& Ws, + const std::vector& Bs, + bool accuracy_check = false); } // namespace egr namespace paddle { namespace imperative { /* ---- Fluid Scale ---- */ -// TODO(jiabin): Change this and remove nolint -void benchmark_fluid_scale_accuracy_check( - const std::shared_ptr& X, // NOLINT - const std::shared_ptr& Out, // NOLINT - const paddle::platform::Place& place); - // TODO(jiabin): Change this and remove nolint void benchmark_fluid_scale( - const std::shared_ptr& X, // NOLINT - const std::shared_ptr& Out, // NOLINT - const paddle::platform::Place& place); + const std::shared_ptr& X, // NOLINT + const paddle::platform::Place& place, bool accuracy_check = false); /* ---- Fluid MatMul ---- */ -void benchmark_fluid_matmul_accuracy_check( - const std::shared_ptr& X, // NOLINT - const std::shared_ptr& Y, // NOLINT - const std::shared_ptr& Out, // NOLINT - const paddle::platform::Place& place); - void benchmark_fluid_matmul( const std::shared_ptr& X, - const std::shared_ptr& Y, // NOLINT - const std::shared_ptr& Out, // NOLINT - const paddle::platform::Place& place); + const std::shared_ptr& Y, // NOLINT + const paddle::platform::Place& place, bool accuracy_check = false); + +/* ---- Fluid MLP ---- */ +void benchmark_fluid_mlp( + const std::shared_ptr& X, + const std::vector>& Ws, + const std::vector>& Bs, + const paddle::platform::Place& place, bool accuracy_check = false); } // namespace imperative } // namespace paddle diff --git a/paddle/fluid/eager/tests/generated_test.cc b/paddle/fluid/eager/tests/generated_test.cc index 07322af6316ab..38fc9c0e765cb 100644 --- a/paddle/fluid/eager/tests/generated_test.cc +++ b/paddle/fluid/eager/tests/generated_test.cc @@ -45,10 +45,7 @@ TEST(Generated, Sigmoid) { VLOG(6) << "Make EagerTensor"; RetainGradForTensor(tensor); VLOG(6) << "Retain Grad for Tensor"; - auto output_tensor = sigmoid_dygraph_function( - tensor, false /*use_mkldnn*/, false /*use_cudnn*/, 0 /*op_role*/, - {} /*op_role_var*/, "" /*op_namescope*/, {} /*op_callstack*/, - "" /*op_device*/, false /*with_quant_attr*/, true /*trace_backward*/); + auto output_tensor = sigmoid_dygraph_function(tensor, {}); VLOG(6) << "Run Backward"; PADDLE_ENFORCE( CompareVariableWithValue(output_tensor, 0.5) == true, @@ -85,11 +82,7 @@ TEST(Generated, Matmul_v2) { RetainGradForTensor(Y); auto output_tensor = matmul_v2_dygraph_function( - X, Y, false /*trans_x*/, false /*trans_y*/, {} /**fused_reshape_Out**/, - {} /**fused_transpose_Out**/, false /*use_mkldnn*/, - "float32" /*mkldnn_data_type*/, 0 /*op_role*/, {} /*op_role_var*/, - "" /*op_namescope*/, {} /*op_callstack*/, "" /*op_device*/, - false /*with_quant_attr*/, true /*trace_backward*/); + X, Y, {{"trans_x", false}, {"trans_y", false}}); PADDLE_ENFORCE( CompareVariableWithValue(output_tensor, 96) == true, diff --git a/paddle/fluid/eager/tests/test_utils.h b/paddle/fluid/eager/tests/test_utils.h index 2b8f1df7465eb..7e9a84ca76a7b 100644 --- a/paddle/fluid/eager/tests/test_utils.h +++ b/paddle/fluid/eager/tests/test_utils.h @@ -49,9 +49,9 @@ bool CompareGradTensorWithValue(const egr::EagerTensor& target, T value) { } for (int i = 0; i < grad_dense->numel(); i++) { - if (ptr[i] != value) { - return false; - } + PADDLE_ENFORCE(value == ptr[i], + paddle::platform::errors::Fatal( + "Numerical Error, Expected %f, got %f", value, ptr[i])); } return true; } @@ -77,7 +77,9 @@ bool CompareTensorWithValue(const egr::EagerTensor& target, T value) { } for (int i = 0; i < dense_t->numel(); i++) { - if (ptr[i] != value) return false; + PADDLE_ENFORCE(value == ptr[i], + paddle::platform::errors::Fatal( + "Numerical Error, Expected %f, got %f", value, ptr[i])); } return true; } @@ -103,9 +105,9 @@ bool CompareVariableWithValue(const egr::EagerTensor& target, T value) { } for (int i = 0; i < lod_tensor.numel(); i++) { - if (ptr[i] != value) { - return false; - } + PADDLE_ENFORCE(value == ptr[i], + paddle::platform::errors::Fatal( + "Numerical Error, Expected %f, got %f", value, ptr[i])); } return true; } @@ -132,11 +134,9 @@ bool CompareGradVariableWithValue(const egr::EagerTensor& target, T value) { } for (int i = 0; i < lod_tensor.numel(); i++) { - if (ptr[i] != value) { - std::cout << " current value is: " << ptr[i] << " i is: " << i - << std::endl; - return false; - } + PADDLE_ENFORCE(value == ptr[i], + paddle::platform::errors::Fatal( + "Numerical Error, Expected %f, got %f", value, ptr[i])); } return true; }