From dc6ed1a5967c644b03874fd1f8a503f0b80be6bd Mon Sep 17 00:00:00 2001 From: XiaobingZhang Date: Sun, 6 Mar 2022 22:55:11 +0800 Subject: [PATCH] enable Conv+LeakyRelu fusion (#589) --- .../csrc/jit/cpu/kernels/ConvPacked.cpp | 10 +++ .../csrc/jit/cpu/kernels/ConvPacked.h | 5 ++ .../jit/cpu/passes/graph_rewrite_conv.cpp | 25 +++++- .../jit/cpu/passes/register_dnnl_jit_ops.cpp | 83 ++++++++++++++----- tests/cpu/test_jit.py | 33 +++++++- 5 files changed, 133 insertions(+), 23 deletions(-) diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp index c7a11c157..c97ae9c37 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp @@ -59,6 +59,16 @@ at::Tensor convolution_relu_run( return op_context->run(input, ideep::attr_t::fuse_relu()); } +at::Tensor convolution_leaky_relu_run( + const at::Tensor& input, + at::Scalar alpha, + const c10::intrusive_ptr& op_context) { + IPEX_RECORD_FUNCTION( + "ipex_prepack::convolution_leaky_relu_run", std::vector({})); + auto alpha_value = alpha.to(); + return op_context->run(input, ideep::attr_t::fuse_relu(1.0, alpha_value)); +} + at::Tensor convolution_sigmoid_run( const at::Tensor& input, const c10::intrusive_ptr& op_context) { diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h index 22ec24872..367440963 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h +++ b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h @@ -32,6 +32,11 @@ at::Tensor convolution_relu_run( const at::Tensor& input, const c10::intrusive_ptr& op_context); +at::Tensor convolution_leaky_relu_run( + const at::Tensor& input, + at::Scalar alpha, + const c10::intrusive_ptr& op_context); + at::Tensor convolution_sigmoid_run( const at::Tensor& input, const c10::intrusive_ptr& op_context); diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp index 2329f6af2..b304104cb 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp @@ -106,13 +106,15 @@ void insertPrePackedConvOp(std::shared_ptr& graph) { void fuseConvWithEltwise(std::shared_ptr& graph) { SubgraphRewriter rewriter_relu, rewriter_sigmoid, rewriter_hardtanh, - rewriter_elu, rewriter_swish, rewriter_silu; + rewriter_elu, rewriter_swish, rewriter_silu, rewriter_leaky_relu; std::array relu_operators = {"relu", "relu_"}; std::array sigmoid_operators = {"sigmoid", "sigmoid_"}; std::array hardtanh_operators = {"hardtanh", "hardtanh_"}; std::array elu_operators = {"elu", "elu_"}; std::array mul_operators = {"mul", "mul_"}; std::array silu_operators = {"silu", "silu_"}; + std::array leaky_relu_operators = { + "leaky_relu", "leaky_relu_"}; auto conv_relu_rstring = CodeTemplate(R"( graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[]): @@ -187,6 +189,19 @@ void fuseConvWithEltwise(std::shared_ptr& graph) { %res = ipex_prepack::convolution_swish_run(%input, %packed_weight) return (%res))"; + auto conv_leaky_relu_rstring = CodeTemplate(R"( + graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[], %alpha): + %packed_weight : __torch__.torch.classes.ipex_prepack.ConvolutionOpContext = ipex_prepack::convolution_prepack(%weight, %bias, %stride, %padding, %dilation, %kernel_size, %groups, %output_channel, %weight_is_channels_last, %weight_is_prepacked, %input_size) + %x = ipex_prepack::convolution_run(%input, %packed_weight) + %res = aten::${leaky_relu}(%x, %alpha) + return (%res))"); + + std::string conv_leaky_relu_fused = R"( + graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[], %alpha): + %packed_weight : __torch__.torch.classes.ipex_prepack.ConvolutionOpContext = ipex_prepack::convolution_leaky_relu_prepack(%weight, %bias, %stride, %padding, %dilation, %kernel_size, %groups, %output_channel, %weight_is_channels_last, %weight_is_prepacked, %input_size, %alpha) + %res = ipex_prepack::convolution_leaky_relu_run(%input, %alpha, %packed_weight) + return (%res))"; + for (const auto& relu : relu_operators) { TemplateEnv env; env.s("relu", relu); @@ -238,12 +253,20 @@ void fuseConvWithEltwise(std::shared_ptr& graph) { return no_input_scale; }; + for (const auto& leaky_relu : leaky_relu_operators) { + TemplateEnv env; + env.s("leaky_relu", leaky_relu); + rewriter_leaky_relu.RegisterRewritePattern( + conv_leaky_relu_rstring.format(env), conv_leaky_relu_fused); + } + rewriter_relu.runOnGraph(graph); rewriter_sigmoid.runOnGraph(graph); rewriter_hardtanh.runOnGraph(graph); rewriter_elu.runOnGraph(graph, filter_conv2d_elu); rewriter_swish.runOnGraph(graph); rewriter_silu.runOnGraph(graph); + rewriter_leaky_relu.runOnGraph(graph); } void fuseConvAddRelu(std::shared_ptr& graph) { diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp index 19cad82c2..04c06eb28 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp @@ -182,25 +182,6 @@ RegisterOperators op({ }; }, aliasAnalysisFromSchema()), - Operator( - "ipex_prepack::convolution_hardtanh_run(Tensor input, Scalar " - "lower_bound, Scalar upper_bound, " - "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext " - "W_prepack) -> Tensor", - [](const Node* node) -> Operation { - return [](Stack* stack) { - auto result = convolution_hardtanh_run( - (std::move(peek(stack, 0, 4))).toTensor(), - (std::move(peek(stack, 1, 4))).toScalar(), - (std::move(peek(stack, 2, 4))).toScalar(), - (std::move(peek(stack, 3, 4))) - .toCustomClass()); - drop(stack, 4); - pack(stack, std::move(result)); - return 0; - }; - }, - aliasAnalysisFromSchema()), Operator( "ipex_prepack::convolution_elu_prepack(" CONV_PREPACK_ARGS ", Scalar alpha, Scalar scale, Scalar input_scale) " @@ -233,6 +214,52 @@ RegisterOperators op({ }; }, aliasAnalysisFromSchema()), + Operator( + "ipex_prepack::convolution_leaky_relu_prepack(" CONV_PREPACK_ARGS + ", Scalar alpha) " + "-> __torch__.torch.classes.ipex_prepack.ConvolutionOpContext", + [](const Node* node) -> Operation { + return [](Stack* stack) { + auto alpha_value = + (std::move(peek(stack, 11, 12))).toScalar().to(); + auto result = IpexConvolutionOpContext::create_context( + std::move((std::move(peek(stack, 0, 12))).toTensor()), + std::move(toOptionalTensor(std::move(peek(stack, 1, 12)))), + std::move((std::move(peek(stack, 2, 12))).toIntVector()), + std::move((std::move(peek(stack, 3, 12))).toIntVector()), + std::move((std::move(peek(stack, 4, 12))).toIntVector()), + std::move((std::move(peek(stack, 5, 12))).toIntVector()), + (std::move(peek(stack, 6, 12))).toInt(), + (std::move(peek(stack, 7, 12))).toInt(), + (std::move(peek(stack, 8, 12))).toBool(), + (std::move(peek(stack, 9, 12))).toBool(), + std::move((std::move(peek(stack, 10, 12))).toIntVector()), + ideep::attr_t::fuse_relu(1.0, alpha_value)); + drop(stack, 12); + pack(stack, std::move(result)); + return 0; + }; + }, + aliasAnalysisFromSchema()), + Operator( + "ipex_prepack::convolution_hardtanh_run(Tensor input, Scalar " + "lower_bound, Scalar upper_bound, " + "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext " + "W_prepack) -> Tensor", + [](const Node* node) -> Operation { + return [](Stack* stack) { + auto result = convolution_hardtanh_run( + (std::move(peek(stack, 0, 4))).toTensor(), + (std::move(peek(stack, 1, 4))).toScalar(), + (std::move(peek(stack, 2, 4))).toScalar(), + (std::move(peek(stack, 3, 4))) + .toCustomClass()); + drop(stack, 4); + pack(stack, std::move(result)); + return 0; + }; + }, + aliasAnalysisFromSchema()), Operator( "ipex_prepack::convolution_elu_run(Tensor input, Scalar alpha, " "Scalar scale, Scalar input_scale, " @@ -253,6 +280,24 @@ RegisterOperators op({ }; }, aliasAnalysisFromSchema()), + Operator( + "ipex_prepack::convolution_leaky_relu_run(Tensor input, Scalar alpha, " + "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext " + "W_prepack) -> Tensor", + [](const Node* node) -> Operation { + return [](Stack* stack) { + auto result = convolution_leaky_relu_run( + (std::move(peek(stack, 0, 3))).toTensor(), + (std::move(peek(stack, 1, 3))).toScalar(), + (std::move(peek(stack, 2, 3))) + .toCustomClass()); + drop(stack, 3); + pack(stack, std::move(result)); + return 0; + }; + }, + aliasAnalysisFromSchema()), + Operator( "ipex_prepack::convolution_bottleneck_run(Tensor(a!) input, " "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext W_prepack1, " diff --git a/tests/cpu/test_jit.py b/tests/cpu/test_jit.py index e8525fc91..28da16b49 100644 --- a/tests/cpu/test_jit.py +++ b/tests/cpu/test_jit.py @@ -154,6 +154,20 @@ def __init__(self, dim, in_channels, out_channels, **kwargs): def forward(self, x): return F.relu(self.conv(x), inplace=True) +class ConvLeakyRelu_Fixed(nn.Module): + def __init__(self, dim, in_channels, out_channels, **kwargs): + super(ConvLeakyRelu_Fixed, self).__init__() + seed = 2018 + torch.manual_seed(seed) + self.conv = conv_module[dim](in_channels, out_channels, bias=False, **kwargs) + self.leaky_relu = nn.LeakyReLU(0.1) + + def forward(self, x): + x = self.conv(x) + x = self.leaky_relu(x) + return x + + class Conv_Relu_Add(nn.Module): def __init__(self, dim, in_channels, out_channels, **kwargs): super(Conv_Relu_Add, self).__init__() @@ -717,7 +731,7 @@ def forward(self, x): class LinearSwishNaive(nn.Module): def __init__(self, in_feature, out_feature): - super(LinearSwishNaive, self).__init__() + super(LinearSwishNaive, self).__init__() self.linear = nn.Linear(in_feature, out_feature) self.sigmoid = nn.Sigmoid() @@ -1824,11 +1838,24 @@ def test_output_conv_relu(self): self._test_output( ConvRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), x, - kind_in_graph="ipex_prepack::convolution_relu_run") + kind_in_graph="ipex_prepack::convolution_relu_run", + kind_not_in_graph="ipex_prepack::convolution_relu_prepack") self._test_output_bf16( ConvRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), x, kind_in_graph="ipex_prepack::convolution_relu_run", + kind_not_in_graph="ipex_prepack::convolution_relu_prepack", + prec=0.02) + self._test_output( + ConvLeakyRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), + x, + kind_in_graph="ipex_prepack::convolution_leaky_relu_run", + kind_not_in_graph="ipex_prepack::convolution_leaky_relu_prepack") + self._test_output_bf16( + ConvLeakyRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), + x, + kind_in_graph="ipex_prepack::convolution_leaky_relu_run", + kind_not_in_graph="ipex_prepack::convolution_leaky_relu_prepack", prec=0.02) def test_output_conv_sum(self): @@ -2323,7 +2350,7 @@ def _test_onednn_fp32(model, input, kind_in_graph, prec=5e-3): res_jit = tr_model(input) self.assertEqual(res_ref, res_jit) self.assertTrue(any(n.kind() == kind_in_graph for n in trace_graph.nodes())) - + _test_onednn_fp32( LinearSwish_v1(3, 32, bias=True), torch.rand(32, 3),