PaddlePaddle · yuanlehome · Nov 28, 2023 · Nov 3, 2023 · Nov 3, 2023 · Nov 3, 2023
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -439,8 +439,7 @@ function(op_library TARGET)
     "bitwise_op"
     "nccl_op"
     "tensor_array_read_write_op"
-    "tensorrt_engine_op"
-    "conv_fusion_op")
+    "tensorrt_engine_op")
 
     if("${TARGET}" STREQUAL "${manual_pybind_op}")
       set(pybind_flag 1)

diff --git a/paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc b/paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc
@@ -111,7 +111,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
       static_cast<phi::DataType>(Get<int>("model_precision")) ==
           phi::DataType::FLOAT16 ||
       Get<bool>("enable_gpu_mixed");
-  bool cutlass_enable = Get<bool>("use_cutlass");
+
   if (!is_fp16_precision) return;
 
   PADDLE_ENFORCE_EQ(graph->IsMainGraph(),
@@ -165,12 +165,12 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
 
   auto CutlassIsValid = [&](ir::Node *op_node) -> bool {
     auto op_desc = op_node->Op();
-    bool use_cutlass = false;
-    if (op_desc->HasAttr("use_cutlass")) {
-      use_cutlass = op_desc->GetAttrIfExists<bool>("use_cutlass");
+    bool use_cudnn = true;
+    if (op_desc->HasAttr("use_cudnn")) {
+      use_cudnn = op_desc->GetAttrIfExists<bool>("use_cudnn");
     }
-
-    return use_cutlass && cutlass_enable;
+    bool cutlass_enable = Get<bool>("use_cutlass");
+    return !use_cudnn && cutlass_enable;
   };
 
   for (auto *op_node : op_nodes) {
@@ -195,8 +195,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
       auto *op_desc = op_node->Op();
 
       if (CutlassIsValid(op_node)) {
-        op_desc->SetType("conv2d_fusion_cutlass");
-        // conv2d_fusion_cutlass must have this attribute because of signature.
+        // conv2d_fusion must have this attribute because of signature.
         if (!op_desc->HasAttr("fuse_alpha")) {
           op_desc->SetAttr("fuse_alpha", 0.f);
         }

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
@@ -50,7 +50,7 @@ framework::proto::OpDesc PrepareOpDesc(
   desc.SetAttr("activation", activation);
   desc.SetOutput("Output", {output});
   desc.SetAttr("is_test", true);
-  desc.SetAttr("use_cudnn", false);
+  desc.SetAttr("use_cudnn", true);
   desc.Flush();
   return *desc.Proto();
 }
@@ -195,7 +195,7 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const {
         base_op_desc, bias_name, bias1_name, act_op_type, act_op_out);
     framework::OpDesc new_op_desc(new_op_proto, nullptr);
     if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) {
-      new_op_desc.SetAttr("use_cutlass", true);
+      new_op_desc.SetAttr("use_cudnn", false);
     }
 
     // Create a new node for the fused op.

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
@@ -46,7 +46,7 @@ framework::proto::OpDesc PrepareOpDesc(
   desc.SetAttr("activation", activation);
   desc.SetOutput("Output", {output});
   desc.SetAttr("is_test", true);
-  desc.SetAttr("use_cudnn", false);
+  desc.SetAttr("use_cudnn", true);
   // for leaky_relu use
   desc.SetAttr("fuse_alpha", alpha);
   desc.Flush();
@@ -208,7 +208,8 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const {
         PrepareOpDesc(base_op_desc, bias_name, act_op_type, act_op_out, alpha);
     framework::OpDesc new_op_desc(new_op_proto, nullptr);
     if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) {
-      new_op_desc.SetAttr("use_cutlass", true);
+      new_op_desc.SetAttr("use_cudnn", false);
+      new_op_desc.Flush();
     }
     // Create a new node for the fused op.
     auto* new_conv_op = graph->CreateOpNode(&new_op_desc);

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc
@@ -115,7 +115,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
     new_op_desc.SetAttr("activation", act_type);
     new_op_desc.SetOutput("Output", {output_name});
     new_op_desc.SetAttr("is_test", true);
-    new_op_desc.SetAttr("use_cudnn", false);
+    new_op_desc.SetAttr("use_cudnn", true);
 
     bool is_fp16_precision =
         static_cast<phi::DataType>(Get<int>("model_precision")) ==
@@ -126,7 +126,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
     bool cutlass_can_fuse = CutlassTeller::Instance()->CbaCanSupport(
         conv_op->Op(), scope, act_type, Get<int>("gpu_device_id"));
     if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) {
-      new_op_desc.SetAttr("use_cutlass", true);
+      new_op_desc.SetAttr("use_cudnn", false);
     }
 
     auto* elementwise_add_op_desc = elementwise_add_op->Op();

diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt
@@ -6,7 +6,6 @@ endif()
 register_operators(
   EXCLUDES
   fused_bn_activation_op
-  conv_fusion_op
   fusion_conv_inception_op
   skip_layernorm_op
   yolo_box_head_op
@@ -41,10 +40,6 @@ if(WITH_GPU OR WITH_ROCM)
   if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401))
     op_library(fused_bn_activation_op)
   endif()
-  # conv_fusion_op needs cudnn 7 above
-  if(NOT ${CUDNN_VERSION} VERSION_LESS 7100)
-    op_library(conv_fusion_op)
-  endif()
   # HIP not support cudnnTransformTensor
   # fusion_conv_inception_op needs cudnn 7 above
   # HIP not support cudnnConvolutionBiasActivationForward