Skip to content

Commit

Permalink
enable conv+leaky_relu fusion (#648)
Browse files Browse the repository at this point in the history
* enable conv+leaky_relu fusion

* fix test issue

Co-authored-by: Wang Weihan <eikan.wang@intel.com>
  • Loading branch information
XiaobingSuper and EikanWang authored Mar 31, 2022
1 parent 7831bbc commit d760313
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 24 deletions.
10 changes: 10 additions & 0 deletions intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ at::Tensor convolution_relu_run(
return op_context->run(input, ideep::attr_t::fuse_relu());
}

at::Tensor convolution_leaky_relu_run(
const at::Tensor& input,
at::Scalar alpha,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context) {
IPEX_RECORD_FUNCTION(
"ipex_prepack::convolution_leaky_relu_run", std::vector<c10::IValue>({}));
auto alpha_value = alpha.to<float>();
return op_context->run(input, ideep::attr_t::fuse_relu(1.0, alpha_value));
}

at::Tensor convolution_sigmoid_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context) {
Expand Down
5 changes: 5 additions & 0 deletions intel_extension_for_pytorch/csrc/jit/cpu/kernels/ConvPacked.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ at::Tensor convolution_relu_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);

at::Tensor convolution_leaky_relu_run(
const at::Tensor& input,
at::Scalar alpha,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);

at::Tensor convolution_sigmoid_run(
const at::Tensor& input,
const c10::intrusive_ptr<ConvolutionOpContext>& op_context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,15 @@ void insertPrePackedConvOp(std::shared_ptr<Graph>& graph) {

void fuseConvWithEltwise(std::shared_ptr<Graph>& graph) {
SubgraphRewriter rewriter_relu, rewriter_sigmoid, rewriter_hardtanh,
rewriter_elu, rewriter_swish, rewriter_silu;
rewriter_elu, rewriter_swish, rewriter_silu, rewriter_leaky_relu;
std::array<std::string, 2> relu_operators = {"relu", "relu_"};
std::array<std::string, 2> sigmoid_operators = {"sigmoid", "sigmoid_"};
std::array<std::string, 2> hardtanh_operators = {"hardtanh", "hardtanh_"};
std::array<std::string, 2> elu_operators = {"elu", "elu_"};
std::array<std::string, 2> mul_operators = {"mul", "mul_"};
std::array<std::string, 2> silu_operators = {"silu", "silu_"};
std::array<std::string, 2> leaky_relu_operators = {
"leaky_relu", "leaky_relu_"};

auto conv_relu_rstring = CodeTemplate(R"(
graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[]):
Expand Down Expand Up @@ -189,6 +191,19 @@ void fuseConvWithEltwise(std::shared_ptr<Graph>& graph) {
%res = ipex_prepack::convolution_swish_run(%input, %packed_weight)
return (%res))";

auto conv_leaky_relu_rstring = CodeTemplate(R"(
graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[], %alpha):
%packed_weight : __torch__.torch.classes.ipex_prepack.ConvolutionOpContext = ipex_prepack::convolution_prepack(%weight, %bias, %stride, %padding, %dilation, %kernel_size, %groups, %output_channel, %weight_is_channels_last, %weight_is_prepacked, %input_size)
%x = ipex_prepack::convolution_run(%input, %packed_weight)
%res = aten::${leaky_relu}(%x, %alpha)
return (%res))");

std::string conv_leaky_relu_fused = R"(
graph(%input, %weight, %bias, %stride:int[], %padding:int[], %dilation:int[], %kernel_size:int[], %groups:int, %output_channel:int, %weight_is_channels_last:bool, %weight_is_prepacked:bool, %input_size:int[], %alpha):
%packed_weight : __torch__.torch.classes.ipex_prepack.ConvolutionOpContext = ipex_prepack::convolution_leaky_relu_prepack(%weight, %bias, %stride, %padding, %dilation, %kernel_size, %groups, %output_channel, %weight_is_channels_last, %weight_is_prepacked, %input_size, %alpha)
%res = ipex_prepack::convolution_leaky_relu_run(%input, %alpha, %packed_weight)
return (%res))";

for (const auto& relu : relu_operators) {
TemplateEnv env;
env.s("relu", relu);
Expand Down Expand Up @@ -240,12 +255,20 @@ void fuseConvWithEltwise(std::shared_ptr<Graph>& graph) {
return no_input_scale;
};

for (const auto& leaky_relu : leaky_relu_operators) {
TemplateEnv env;
env.s("leaky_relu", leaky_relu);
rewriter_leaky_relu.RegisterRewritePattern(
conv_leaky_relu_rstring.format(env), conv_leaky_relu_fused);
}

rewriter_relu.runOnGraph(graph);
rewriter_sigmoid.runOnGraph(graph);
rewriter_hardtanh.runOnGraph(graph);
rewriter_elu.runOnGraph(graph, filter_conv2d_elu);
rewriter_swish.runOnGraph(graph);
rewriter_silu.runOnGraph(graph);
rewriter_leaky_relu.runOnGraph(graph);
}

void fuseConvAddRelu(std::shared_ptr<Graph>& graph) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ void PrePackingOpsFolder(Block* b) {
n->kind() ==
Symbol::fromQualString(
"ipex_prepack::convolution_add_relu_prepack") ||

n->kind() == Symbol::fromQualString("ipex_prepack::linear_prepack") ||
n->kind() ==
Symbol::fromQualString("ipex_prepack::conv_transpose2d_prepack"));
Symbol::fromQualString("ipex_prepack::conv_transpose2d_prepack") ||
n->kind() ==
Symbol::fromQualString(
"ipex_prepack::convolution_leaky_relu_prepack"));
};

std::unordered_set<Node*> nodes_to_delete;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,25 +182,6 @@ RegisterOperators op({
};
},
aliasAnalysisFromSchema()),
Operator(
"ipex_prepack::convolution_hardtanh_run(Tensor input, Scalar "
"lower_bound, Scalar upper_bound, "
"__torch__.torch.classes.ipex_prepack.ConvolutionOpContext "
"W_prepack) -> Tensor",
[](const Node* node) -> Operation {
return [](Stack* stack) {
auto result = convolution_hardtanh_run(
(std::move(peek(stack, 0, 4))).toTensor(),
(std::move(peek(stack, 1, 4))).toScalar(),
(std::move(peek(stack, 2, 4))).toScalar(),
(std::move(peek(stack, 3, 4)))
.toCustomClass<ConvolutionOpContext>());
drop(stack, 4);
pack(stack, std::move(result));
return 0;
};
},
aliasAnalysisFromSchema()),
Operator(
"ipex_prepack::convolution_elu_prepack(" CONV_PREPACK_ARGS
", Scalar alpha, Scalar scale, Scalar input_scale) "
Expand Down Expand Up @@ -233,6 +214,52 @@ RegisterOperators op({
};
},
aliasAnalysisFromSchema()),
Operator(
"ipex_prepack::convolution_leaky_relu_prepack(" CONV_PREPACK_ARGS
", Scalar alpha) "
"-> __torch__.torch.classes.ipex_prepack.ConvolutionOpContext",
[](const Node* node) -> Operation {
return [](Stack* stack) {
auto alpha_value =
(std::move(peek(stack, 11, 12))).toScalar().to<float>();
auto result = IpexConvolutionOpContext::create_context(
std::move((std::move(peek(stack, 0, 12))).toTensor()),
std::move(toOptionalTensor(std::move(peek(stack, 1, 12)))),
std::move((std::move(peek(stack, 2, 12))).toIntVector()),
std::move((std::move(peek(stack, 3, 12))).toIntVector()),
std::move((std::move(peek(stack, 4, 12))).toIntVector()),
std::move((std::move(peek(stack, 5, 12))).toIntVector()),
(std::move(peek(stack, 6, 12))).toInt(),
(std::move(peek(stack, 7, 12))).toInt(),
(std::move(peek(stack, 8, 12))).toBool(),
(std::move(peek(stack, 9, 12))).toBool(),
std::move((std::move(peek(stack, 10, 12))).toIntVector()),
ideep::attr_t::fuse_relu(1.0, alpha_value));
drop(stack, 12);
pack(stack, std::move(result));
return 0;
};
},
aliasAnalysisFromSchema()),
Operator(
"ipex_prepack::convolution_hardtanh_run(Tensor input, Scalar "
"lower_bound, Scalar upper_bound, "
"__torch__.torch.classes.ipex_prepack.ConvolutionOpContext "
"W_prepack) -> Tensor",
[](const Node* node) -> Operation {
return [](Stack* stack) {
auto result = convolution_hardtanh_run(
(std::move(peek(stack, 0, 4))).toTensor(),
(std::move(peek(stack, 1, 4))).toScalar(),
(std::move(peek(stack, 2, 4))).toScalar(),
(std::move(peek(stack, 3, 4)))
.toCustomClass<ConvolutionOpContext>());
drop(stack, 4);
pack(stack, std::move(result));
return 0;
};
},
aliasAnalysisFromSchema()),
Operator(
"ipex_prepack::convolution_elu_run(Tensor input, Scalar alpha, "
"Scalar scale, Scalar input_scale, "
Expand All @@ -253,6 +280,24 @@ RegisterOperators op({
};
},
aliasAnalysisFromSchema()),
Operator(
"ipex_prepack::convolution_leaky_relu_run(Tensor input, Scalar alpha, "
"__torch__.torch.classes.ipex_prepack.ConvolutionOpContext "
"W_prepack) -> Tensor",
[](const Node* node) -> Operation {
return [](Stack* stack) {
auto result = convolution_leaky_relu_run(
(std::move(peek(stack, 0, 3))).toTensor(),
(std::move(peek(stack, 1, 3))).toScalar(),
(std::move(peek(stack, 2, 3)))
.toCustomClass<ConvolutionOpContext>());
drop(stack, 3);
pack(stack, std::move(result));
return 0;
};
},
aliasAnalysisFromSchema()),

Operator(
"ipex_prepack::convolution_bottleneck_run(Tensor(a!) input, "
"__torch__.torch.classes.ipex_prepack.ConvolutionOpContext W_prepack1, "
Expand Down
32 changes: 30 additions & 2 deletions tests/cpu/test_jit.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,19 @@ def __init__(self, dim, in_channels, out_channels, **kwargs):
def forward(self, x):
return F.relu(self.conv(x), inplace=True)

class ConvLeakyRelu_Fixed(nn.Module):
def __init__(self, dim, in_channels, out_channels, **kwargs):
super(ConvLeakyRelu_Fixed, self).__init__()
seed = 2018
torch.manual_seed(seed)
self.conv = conv_module[dim](in_channels, out_channels, bias=False, **kwargs)
self.leaky_relu = nn.LeakyReLU(0.1)

def forward(self, x):
x = self.conv(x)
x = self.leaky_relu(x)
return x

class Conv_Relu_Add(nn.Module):
def __init__(self, dim, in_channels, out_channels, **kwargs):
super(Conv_Relu_Add, self).__init__()
Expand Down Expand Up @@ -1787,12 +1800,27 @@ def test_output_conv_relu(self):
self._test_output(
ConvRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1),
x,
kind_in_graph="ipex_prepack::convolution_relu_run")
kind_in_graph="ipex_prepack::convolution_relu_run",
kind_not_in_graph="ipex_prepack::convolution_relu_prepack")
self._test_output_bf16(
ConvRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1),
x,
kind_in_graph="ipex_prepack::convolution_relu_run",
prec=0.08)
kind_not_in_graph="ipex_prepack::convolution_relu_prepack",
prec=0.08,
levels=['O1'])
self._test_output(
ConvLeakyRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1),
x,
kind_in_graph="ipex_prepack::convolution_leaky_relu_run",
kind_not_in_graph="ipex_prepack::convolution_leaky_relu_prepack")
self._test_output_bf16(
ConvLeakyRelu_Fixed(dim, in_channels, out_channels, kernel_size=kernel_size, stride=1),
x,
kind_in_graph="ipex_prepack::convolution_leaky_relu_run",
kind_not_in_graph="ipex_prepack::convolution_leaky_relu_prepack",
prec=0.02,
levels=['O1'])

def test_output_conv_sum(self):
batch_size = 8
Expand Down

0 comments on commit d760313

Please sign in to comment.