Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Inference] conv2d_fuison op support codegen #58650

Merged
merged 8 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,7 @@ function(op_library TARGET)
"bitwise_op"
"nccl_op"
"tensor_array_read_write_op"
"tensorrt_engine_op"
"conv_fusion_op")
"tensorrt_engine_op")

if("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
Expand Down
15 changes: 7 additions & 8 deletions paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
static_cast<phi::DataType>(Get<int>("model_precision")) ==
phi::DataType::FLOAT16 ||
Get<bool>("enable_gpu_mixed");
bool cutlass_enable = Get<bool>("use_cutlass");

if (!is_fp16_precision) return;

PADDLE_ENFORCE_EQ(graph->IsMainGraph(),
Expand Down Expand Up @@ -165,12 +165,12 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {

auto CutlassIsValid = [&](ir::Node *op_node) -> bool {
auto op_desc = op_node->Op();
bool use_cutlass = false;
if (op_desc->HasAttr("use_cutlass")) {
use_cutlass = op_desc->GetAttrIfExists<bool>("use_cutlass");
bool use_cudnn = true;
if (op_desc->HasAttr("use_cudnn")) {
use_cudnn = op_desc->GetAttrIfExists<bool>("use_cudnn");
}

return use_cutlass && cutlass_enable;
bool cutlass_enable = Get<bool>("use_cutlass");
return !use_cudnn && cutlass_enable;
};

for (auto *op_node : op_nodes) {
Expand All @@ -195,8 +195,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
auto *op_desc = op_node->Op();

if (CutlassIsValid(op_node)) {
op_desc->SetType("conv2d_fusion_cutlass");
// conv2d_fusion_cutlass must have this attribute because of signature.
// conv2d_fusion must have this attribute because of signature.
if (!op_desc->HasAttr("fuse_alpha")) {
op_desc->SetAttr("fuse_alpha", 0.f);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ framework::proto::OpDesc PrepareOpDesc(
desc.SetAttr("activation", activation);
desc.SetOutput("Output", {output});
desc.SetAttr("is_test", true);
desc.SetAttr("use_cudnn", false);
desc.SetAttr("use_cudnn", true);
desc.Flush();
return *desc.Proto();
}
Expand Down Expand Up @@ -195,7 +195,7 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const {
base_op_desc, bias_name, bias1_name, act_op_type, act_op_out);
framework::OpDesc new_op_desc(new_op_proto, nullptr);
if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) {
new_op_desc.SetAttr("use_cutlass", true);
new_op_desc.SetAttr("use_cudnn", false);
}

// Create a new node for the fused op.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ framework::proto::OpDesc PrepareOpDesc(
desc.SetAttr("activation", activation);
desc.SetOutput("Output", {output});
desc.SetAttr("is_test", true);
desc.SetAttr("use_cudnn", false);
desc.SetAttr("use_cudnn", true);
// for leaky_relu use
desc.SetAttr("fuse_alpha", alpha);
desc.Flush();
Expand Down Expand Up @@ -208,7 +208,8 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const {
PrepareOpDesc(base_op_desc, bias_name, act_op_type, act_op_out, alpha);
framework::OpDesc new_op_desc(new_op_proto, nullptr);
if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) {
new_op_desc.SetAttr("use_cutlass", true);
new_op_desc.SetAttr("use_cudnn", false);
new_op_desc.Flush();
}
// Create a new node for the fused op.
auto* new_conv_op = graph->CreateOpNode(&new_op_desc);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
new_op_desc.SetAttr("activation", act_type);
new_op_desc.SetOutput("Output", {output_name});
new_op_desc.SetAttr("is_test", true);
new_op_desc.SetAttr("use_cudnn", false);
new_op_desc.SetAttr("use_cudnn", true);

bool is_fp16_precision =
static_cast<phi::DataType>(Get<int>("model_precision")) ==
Expand All @@ -126,7 +126,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
bool cutlass_can_fuse = CutlassTeller::Instance()->CbaCanSupport(
conv_op->Op(), scope, act_type, Get<int>("gpu_device_id"));
if (cutlass_can_fuse && cutlass_enable && is_fp16_precision) {
new_op_desc.SetAttr("use_cutlass", true);
new_op_desc.SetAttr("use_cudnn", false);
}

auto* elementwise_add_op_desc = elementwise_add_op->Op();
Expand Down
5 changes: 0 additions & 5 deletions paddle/fluid/operators/fused/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ endif()
register_operators(
EXCLUDES
fused_bn_activation_op
conv_fusion_op
fusion_conv_inception_op
skip_layernorm_op
yolo_box_head_op
Expand Down Expand Up @@ -41,10 +40,6 @@ if(WITH_GPU OR WITH_ROCM)
if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 7401))
op_library(fused_bn_activation_op)
endif()
# conv_fusion_op needs cudnn 7 above
if(NOT ${CUDNN_VERSION} VERSION_LESS 7100)
op_library(conv_fusion_op)
endif()
# HIP not support cudnnTransformTensor
# fusion_conv_inception_op needs cudnn 7 above
# HIP not support cudnnConvolutionBiasActivationForward
Expand Down
Loading