diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp index c7a11c157..c97ae9c37 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp @@ -59,6 +59,16 @@ at::Tensor convolution_relu_run( return op_context->run(input, ideep::attr_t::fuse_relu()); } +at::Tensor convolution_leaky_relu_run( + const at::Tensor& input, + at::Scalar alpha, + const c10::intrusive_ptr& op_context) { + IPEX_RECORD_FUNCTION( + "ipex_prepack::convolution_leaky_relu_run", std::vector({})); + auto alpha_value = alpha.to(); + return op_context->run(input, ideep::attr_t::fuse_relu(1.0, alpha_value)); +} + at::Tensor convolution_sigmoid_run( const at::Tensor& input, const c10::intrusive_ptr& op_context) { diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h index 22ec24872..367440963 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h +++ b/intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h @@ -32,6 +32,11 @@ at::Tensor convolution_relu_run( const at::Tensor& input, const c10::intrusive_ptr& op_context); +at::Tensor convolution_leaky_relu_run( + const at::Tensor& input, + at::Scalar alpha, + const c10::intrusive_ptr& op_context); + at::Tensor convolution_sigmoid_run( const at::Tensor& input, const c10::intrusive_ptr& op_context); diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp index 91751d6a2..ac89c7403 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/passes/graph_rewrite_conv.cpp @@ -108,13 +108,15 @@ void insertPrePackedConvOp(std::shared_ptr& graph) { void fuseConvWithEltwise(std::shared_ptr& graph) { SubgraphRewriter rewriter_relu, rewriter_sigmoid, rewriter_hardtanh, - rewriter_elu, rewriter_swish, rewriter_silu; + rewriter_elu, rewriter_swish, rewriter_silu, rewriter_leaky_relu; std::array relu_operators = {"relu", "relu_"}; std::array sigmoid_operators = {"sigmoid", "sigmoid_"}; std::array hardtanh_operators = {"hardtanh", "hardtanh_"}; std::array elu_operators = {"elu", "elu_"}; std::array mul_operators = {"mul", "mul_"}; std::array silu_operators = {"silu", "silu_"}; + std::array leaky_relu_operators = { + "leaky_relu", "leaky_relu_"}; auto conv_relu_rstring = CodeTemplate(R"( graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[]): @@ -189,6 +191,19 @@ void fuseConvWithEltwise(std::shared_ptr& graph) { %res = ipex_prepack::convolution_swish_run(%input, %packed_weight) return (%res))"; + auto conv_leaky_relu_rstring = CodeTemplate(R"( + graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[], %alpha): + %packed_weight : __torch__.torch.classes.ipex_prepack.ConvolutionOpContext = ipex_prepack::convolution_prepack(%weight, %bias, %stride, %padding, %dilation, %kernel_size, %groups, %output_channel, %weight_is_channels_last, %weight_is_prepacked, %input_size) + %x = ipex_prepack::convolution_run(%input, %packed_weight) + %res = aten::${leaky_relu}(%x, %alpha) + return (%res))"); + + std::string conv_leaky_relu_fused = R"( + graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[], %alpha): + %packed_weight : __torch__.torch.classes.ipex_prepack.ConvolutionOpContext = ipex_prepack::convolution_leaky_relu_prepack(%weight, %bias, %stride, %padding, %dilation, %kernel_size, %groups, %output_channel, %weight_is_channels_last, %weight_is_prepacked, %input_size, %alpha) + %res = ipex_prepack::convolution_leaky_relu_run(%input, %alpha, %packed_weight) + return (%res))"; + for (const auto& relu : relu_operators) { TemplateEnv env; env.s("relu", relu); @@ -240,12 +255,20 @@ void fuseConvWithEltwise(std::shared_ptr& graph) { return no_input_scale; }; + for (const auto& leaky_relu : leaky_relu_operators) { + TemplateEnv env; + env.s("leaky_relu", leaky_relu); + rewriter_leaky_relu.RegisterRewritePattern( + conv_leaky_relu_rstring.format(env), conv_leaky_relu_fused); + } + rewriter_relu.runOnGraph(graph); rewriter_sigmoid.runOnGraph(graph); rewriter_hardtanh.runOnGraph(graph); rewriter_elu.runOnGraph(graph, filter_conv2d_elu); rewriter_swish.runOnGraph(graph); rewriter_silu.runOnGraph(graph); + rewriter_leaky_relu.runOnGraph(graph); } void fuseConvAddRelu(std::shared_ptr& graph) { diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/prepack_folding.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/prepack_folding.cpp index 203c7431c..cbc0fda5e 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/passes/prepack_folding.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/passes/prepack_folding.cpp @@ -28,10 +28,12 @@ void PrePackingOpsFolder(Block* b) { n->kind() == Symbol::fromQualString( "ipex_prepack::convolution_add_relu_prepack") || - n->kind() == Symbol::fromQualString("ipex_prepack::linear_prepack") || n->kind() == - Symbol::fromQualString("ipex_prepack::conv_transpose2d_prepack")); + Symbol::fromQualString("ipex_prepack::conv_transpose2d_prepack") || + n->kind() == + Symbol::fromQualString( + "ipex_prepack::convolution_leaky_relu_prepack")); }; std::unordered_set nodes_to_delete; diff --git a/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp b/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp index 227a3eb34..e12013e51 100644 --- a/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp +++ b/intel_extension_for_pytorch/csrc/jit/cpu/passes/register_dnnl_jit_ops.cpp @@ -182,25 +182,6 @@ RegisterOperators op({ }; }, aliasAnalysisFromSchema()), - Operator( - "ipex_prepack::convolution_hardtanh_run(Tensor input, Scalar " - "lower_bound, Scalar upper_bound, " - "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext " - "W_prepack) -> Tensor", - [](const Node* node) -> Operation { - return [](Stack* stack) { - auto result = convolution_hardtanh_run( - (std::move(peek(stack, 0, 4))).toTensor(), - (std::move(peek(stack, 1, 4))).toScalar(), - (std::move(peek(stack, 2, 4))).toScalar(), - (std::move(peek(stack, 3, 4))) - .toCustomClass()); - drop(stack, 4); - pack(stack, std::move(result)); - return 0; - }; - }, - aliasAnalysisFromSchema()), Operator( "ipex_prepack::convolution_elu_prepack(" CONV_PREPACK_ARGS ", Scalar alpha, Scalar scale, Scalar input_scale) " @@ -233,6 +214,52 @@ RegisterOperators op({ }; }, aliasAnalysisFromSchema()), + Operator( + "ipex_prepack::convolution_leaky_relu_prepack(" CONV_PREPACK_ARGS + ", Scalar alpha) " + "-> __torch__.torch.classes.ipex_prepack.ConvolutionOpContext", + [](const Node* node) -> Operation { + return [](Stack* stack) { + auto alpha_value = + (std::move(peek(stack, 11, 12))).toScalar().to(); + auto result = IpexConvolutionOpContext::create_context( + std::move((std::move(peek(stack, 0, 12))).toTensor()), + std::move(toOptionalTensor(std::move(peek(stack, 1, 12)))), + std::move((std::move(peek(stack, 2, 12))).toIntVector()), + std::move((std::move(peek(stack, 3, 12))).toIntVector()), + std::move((std::move(peek(stack, 4, 12))).toIntVector()), + std::move((std::move(peek(stack, 5, 12))).toIntVector()), + (std::move(peek(stack, 6, 12))).toInt(), + (std::move(peek(stack, 7, 12))).toInt(), + (std::move(peek(stack, 8, 12))).toBool(), + (std::move(peek(stack, 9, 12))).toBool(), + std::move((std::move(peek(stack, 10, 12))).toIntVector()), + ideep::attr_t::fuse_relu(1.0, alpha_value)); + drop(stack, 12); + pack(stack, std::move(result)); + return 0; + }; + }, + aliasAnalysisFromSchema()), + Operator( + "ipex_prepack::convolution_hardtanh_run(Tensor input, Scalar " + "lower_bound, Scalar upper_bound, " + "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext " + "W_prepack) -> Tensor", + [](const Node* node) -> Operation { + return [](Stack* stack) { + auto result = convolution_hardtanh_run( + (std::move(peek(stack, 0, 4))).toTensor(), + (std::move(peek(stack, 1, 4))).toScalar(), + (std::move(peek(stack, 2, 4))).toScalar(), + (std::move(peek(stack, 3, 4))) + .toCustomClass()); + drop(stack, 4); + pack(stack, std::move(result)); + return 0; + }; + }, + aliasAnalysisFromSchema()), Operator( "ipex_prepack::convolution_elu_run(Tensor input, Scalar alpha, " "Scalar scale, Scalar input_scale, " @@ -253,6 +280,24 @@ RegisterOperators op({ }; }, aliasAnalysisFromSchema()), + Operator( + "ipex_prepack::convolution_leaky_relu_run(Tensor input, Scalar alpha, " + "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext " + "W_prepack) -> Tensor", + [](const Node* node) -> Operation { + return [](Stack* stack) { + auto result = convolution_leaky_relu_run( + (std::move(peek(stack, 0, 3))).toTensor(), + (std::move(peek(stack, 1, 3))).toScalar(), + (std::move(peek(stack, 2, 3))) + .toCustomClass()); + drop(stack, 3); + pack(stack, std::move(result)); + return 0; + }; + }, + aliasAnalysisFromSchema()), + Operator( "ipex_prepack::convolution_bottleneck_run(Tensor(a!) input, " "__torch__.torch.classes.ipex_prepack.ConvolutionOpContext W_prepack1, " diff --git a/tests/cpu/test_jit.py b/tests/cpu/test_jit.py index d68088d90..c22110016 100644 --- a/tests/cpu/test_jit.py +++ b/tests/cpu/test_jit.py @@ -156,6 +156,19 @@ def __init__(self, dim, in_channels, out_channels, **kwargs): def forward(self, x): return F.relu(self.conv(x), inplace=True) +class ConvLeakyRelu_Fixed(nn.Module): + def __init__(self, dim, in_channels, out_channels, **kwargs): + super(ConvLeakyRelu_Fixed, self).__init__() + seed = 2018 + torch.manual_seed(seed) + self.conv = conv_module[dim](in_channels, out_channels, bias=False, **kwargs) + self.leaky_relu = nn.LeakyReLU(0.1) + + def forward(self, x): + x = self.conv(x) + x = self.leaky_relu(x) + return x + class Conv_Relu_Add(nn.Module): def __init__(self, dim, in_channels, out_channels, **kwargs): super(Conv_Relu_Add, self).__init__() @@ -1787,12 +1800,27 @@ def test_output_conv_relu(self): self._test_output( ConvRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), x, - kind_in_graph="ipex_prepack::convolution_relu_run") + kind_in_graph="ipex_prepack::convolution_relu_run", + kind_not_in_graph="ipex_prepack::convolution_relu_prepack") self._test_output_bf16( ConvRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), x, kind_in_graph="ipex_prepack::convolution_relu_run", - prec=0.08) + kind_not_in_graph="ipex_prepack::convolution_relu_prepack", + prec=0.08, + levels=['O1']) + self._test_output( + ConvLeakyRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), + x, + kind_in_graph="ipex_prepack::convolution_leaky_relu_run", + kind_not_in_graph="ipex_prepack::convolution_leaky_relu_prepack") + self._test_output_bf16( + ConvLeakyRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1), + x, + kind_in_graph="ipex_prepack::convolution_leaky_relu_run", + kind_not_in_graph="ipex_prepack::convolution_leaky_relu_prepack", + prec=0.02, + levels=['O1']) def test_output_conv_sum(self): batch_size = 8