Skip to content

Commit

Permalink
[QNN] Implement quantized avg_pool2d
Browse files Browse the repository at this point in the history
* qnn.avg_pool2d is integrated into Relay and integration tests are added
* FQ2I is modified to pick up qnn.avg_pool2d
* Canonicalization and layout conversion for qnn.avg_pool2d are implemented
* Dynamic PoolArea computation is implemented for both qnn and slice_op avg_pool2d
  to exclude padding area in averaging calculation when count_include_pad=False

Co-authored-by: Fateme Hosseini <quic_fhossein@quicinc.com>
Co-authored-by: Jyotsna Verma <jverma@quicinc.com>
Co-authored-by: Anirudh Sundar <quic_sanirudh@quicinc.com>
Co-authored-by: Venkat Rasagna Reddy Komatireddy <quic_rasagna@quicinc.com>
  • Loading branch information
4 people authored and Krzysztof Parzyszek committed Jun 7, 2023
1 parent 2d2b727 commit 894d3ab
Show file tree
Hide file tree
Showing 18 changed files with 1,836 additions and 441 deletions.
3 changes: 3 additions & 0 deletions python/tvm/relay/qnn/op/_qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,6 @@ def alter_op_layout_qnn_dense(attrs, inputs, tinfos, out_type):
# qnn.batch_matmul
register_strategy("qnn.batch_matmul", strategy.qnn_batch_matmul_strategy)
register_pattern("qnn.batch_matmul", OpPattern.OUT_ELEMWISE_FUSABLE)

# qnn.avg_pool2d
register_strategy("qnn.avg_pool2d", strategy.qnn_avg_pool2d_strategy)
35 changes: 35 additions & 0 deletions python/tvm/relay/qnn/op/layout_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,38 @@ def convert_qnn_conv2d_transpose(attrs, inputs, tinfos, desired_layouts):
return relay.qnn.op.conv2d_transpose(*inputs, **new_attrs)

raise ValueError(f"Layout {desired_data_layout} is not yet supported")


@reg.register_convert_op_layout("qnn.avg_pool2d")
def convert_qnn_avg_pool2d(attrs, inputs, tinfos, desired_layouts):
"""Convert Layout pass registration for QNN avg_pool2d op.
Parameters
----------
attrs : tvm.ir.Attrs
Attributes of current avg_pool2d
inputs : list of tvm.relay.Expr
The args of the Relay expr to be legalized
tinfos : list of types
List of input and output types
desired_layouts : list of layout strings
List of layouts defining our desired
layout for the data input.
Returns
-------
result : tvm.relay.Expr
The transformed expr
"""
# pylint: disable=import-outside-toplevel
from tvm import relay

assert len(desired_layouts) == 1, "A desired layout is expected for qnn.avg_pool2d's input"
desired_data_layout = desired_layouts[0]
if desired_data_layout == "NCHW" or desired_data_layout == "NHWC":
new_attrs = dict(attrs)
new_attrs["layout"] = str(desired_data_layout)
new_attrs["out_layout"] = str(desired_data_layout)
return relay.qnn.op.avg_pool2d(*inputs, **new_attrs)

raise ValueError(f"Layout {desired_data_layout} is not yet supported")
66 changes: 66 additions & 0 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1249,3 +1249,69 @@ def leaky_relu(x, alpha, input_scale, input_zero_point, output_scale, output_zer

def softmax(x, scale, zero_point, output_scale, output_zero_point, axis=-1):
return _make.softmax(x, axis, scale, zero_point, output_scale, output_zero_point)


def avg_pool2d(
data,
input_scale,
input_zero_point,
output_scale,
output_zero_point,
pool_size,
strides,
padding,
dilation,
ceil_mode=False,
count_include_pad=True,
layout="NHWC",
out_layout="",
):

"""Quantized avg_pool2d
Parameters
----------
data : relay.Expr
The quantized input tensor.
input_scale: float
The scale of the input quantized expr.
input_zero_point: int
The zero point of input quantized expr.
output_scale: flaot
The scale of the output quantized expr.
output_zero_point: int
The zero point of output quantized expr.
pool_size : relay.Expr
The pool_size
strides : relay.Expr
The strides
padding : relay.Expr
The padding size
dilation : relay.Expr
The dilation size
ceil_mode : bool, optional
Whether to use ceil or floor for calculating the output shape
count_include_pad : bool, optional
Determines if padding should be taken into account in the computation
layout: string, optinal
out_layout: string, optional
Returns
-------
result : relay.Expr
The computed result.
"""
return _make.avg_pool2d(
data,
input_scale,
input_zero_point,
output_scale,
output_zero_point,
pool_size,
strides,
padding,
dilation,
ceil_mode,
count_include_pad,
layout,
out_layout,
)
39 changes: 39 additions & 0 deletions python/tvm/relay/qnn/strategy/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,36 @@ def wrapper(_attrs, inputs, out_type):
return wrapper


def wrap_compute_qnn_avg_pool2d(topi_compute):
"""Wrap qnn.avg_pool2d topi compute"""

def wrapper(attrs, inputs, out_type):
kernel = attrs.pool_size
strides = attrs.strides
padding = attrs.padding
dilation = attrs.dilation
count_include_pad = attrs.count_include_pad
oshape = out_type.shape
odtype = out_type.dtype
args = [
inputs[0],
kernel,
strides,
padding,
dilation,
count_include_pad,
oshape,
odtype,
inputs[1],
inputs[2],
inputs[3],
inputs[4],
]
return [topi_compute(*args)]

return wrapper


def wrap_topi_concatenate(topi_compute):
"""Wrap TOPI compute which use qnn.concatenate attrs"""

Expand Down Expand Up @@ -280,3 +310,12 @@ def qnn_batch_matmul_strategy(attrs, inputs, out_type, target):
"qnn.batch_matmul is currently only supported with Hexagon. "
"Please run QNN Canonicalize pass to decompose this op into supported ops."
)


@override_native_generic_func("qnn_avg_pool2d_strategy")
def qnn_avg_pool2d_strategy(attrs, inputs, out_type, target):
"""qnn.avg_pool2d generic strategy"""
raise RuntimeError(
"qnn.avg_pool2d is currently only supported with Hexagon. "
"Please run QNN Canonicalize pass to decompose this op into supported ops."
)
24 changes: 24 additions & 0 deletions python/tvm/relay/qnn/strategy/hexagon.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,27 @@ def qnn_batch_matmul_strategy_hexagon(attrs, inputs, out_type, target):
name="qnn_batch_matmul.hexagon",
)
return strategy


@qnn_avg_pool2d_strategy.register(["hexagon"])
def qnn_avg_pool2d_strategy_hexagon(attrs, inputs, out_type, target):
"""qnn.avg_pool2d strategy for Hexagon"""
data_layout = attrs.layout
if data_layout == "NHWC":
strategy = _op.OpStrategy()
strategy.add_implementation(
wrap_compute_qnn_avg_pool2d(topi.hexagon.qnn.qnn_avg_pool2d_wrapper_compute_NHWC),
wrap_topi_schedule(topi.hexagon.qnn.schedule_qnn_avg_pool2d),
name="qnn_avg_pool2d.hexagon",
)
return strategy
elif data_layout == "NCHW":
strategy = _op.OpStrategy()
strategy.add_implementation(
wrap_compute_qnn_avg_pool2d(topi.hexagon.qnn.qnn_avg_pool2d_wrapper_compute_NCHW),
wrap_topi_schedule(topi.hexagon.qnn.schedule_qnn_avg_pool2d),
name="qnn_avg_pool2d.hexagon",
)
return strategy
else:
raise RuntimeError("Unsupported strategy for qnn.avg_pool2d")
57 changes: 26 additions & 31 deletions python/tvm/relay/transform/fake_quantization_to_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,32 @@ def identity(expr, type_map):
register_unary_identity("image.resize2d")


@register_fake_quantization_to_integer("nn.avg_pool2d")
def avgpool2d(expr, type_map):
"""Rewrite an avgpool op"""
attrs = {**expr.attrs}
arg = expr.args[0]
t = type_map[arg]
out_t = type_map[expr]

out = relay.qnn.op.avg_pool2d(
arg,
t.scale,
t.zero_point,
out_t.scale,
out_t.zero_point,
attrs["pool_size"],
attrs["strides"],
attrs["padding"],
attrs["dilation"],
attrs["ceil_mode"],
attrs["count_include_pad"],
attrs["layout"],
)

return [out, TensorAffineType(out_t.scale, out_t.zero_point, out_t.dtype, out_t.axis)]


@register_fake_quantization_to_integer("nn.adaptive_avg_pool1d")
def adaptive_avgpool1d(expr, type_map):
"""Rewrite an adaptive avgpool op"""
Expand Down Expand Up @@ -138,37 +164,6 @@ def adaptive_avgpool1d(expr, type_map):
return [out, TensorAffineType(out_t.scale, out_t.zero_point, "int32", out_t.axis)]


@register_fake_quantization_to_integer("nn.avg_pool2d")
def avgpool2d(expr, type_map):
"""Rewrite a avgpool op"""
arg = expr.args[0]
t = type_map[arg]
out_t = type_map[expr]
# Cast (or requantize) to int32.
if not (
approx_equal(t.scale, out_t.scale)
and approx_equal(t.zero_point, out_t.zero_point)
and tvm.ir.structural_equal(t.dtype, out_t.dtype)
):
arg = relay.qnn.op.requantize(
arg,
t.scale,
t.zero_point,
out_t.scale,
out_t.zero_point,
out_dtype="int32",
axis=t.axis,
)
else:
arg = relay.op.cast(arg, "int32")
out = relay.op.nn.avg_pool2d(arg, **expr.attrs)
if out_t.dtype != "int32":
# Cast back to output dtype to preserve input dtype == output dtype for AvgPool2d.
out = relay.op.clip(out, a_min=np.iinfo(out_t.dtype).min, a_max=np.iinfo(out_t.dtype).max)
out = relay.op.cast(out, out_t.dtype)
return [out, TensorAffineType(out_t.scale, out_t.zero_point, out_t.dtype, out_t.axis)]


@register_fake_quantization_to_integer("nn.global_avg_pool2d")
def global_avgpool2d(expr, type_map):
"""Rewrite a global_avgpool op"""
Expand Down
Loading

0 comments on commit 894d3ab

Please sign in to comment.