Skip to content

Commit

Permalink
[ONNX] QLinearConv Support (#8007)
Browse files Browse the repository at this point in the history
* Add QLinearConv for onnx frontend

* Reformat

* Squeeze 1D tensor for weight_scale & weight_zero_point

* Doing dequatize -> quantize if y_scale is not constant
  • Loading branch information
huochaitiantang authored May 13, 2021
1 parent b81f3f7 commit 158aedd
Show file tree
Hide file tree
Showing 2 changed files with 302 additions and 4 deletions.
104 changes: 104 additions & 0 deletions python/tvm/relay/frontend/onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2827,6 +2827,109 @@ def _impl_v11(cls, inputs, attr, params):
)


class QLinearConv(OnnxOpConverter):
"""Operator converter for QLinearConv."""

@classmethod
def _impl_v10(cls, inputs, attr, params):
def get_scalar(x, dtype="float32"):
if isinstance(x, _expr.Var) and x.name_hint in params:
return _op.const(params[x.name_hint].asnumpy(), dtype)
rank = len(infer_shape(x))
assert rank <= 1, "QLinearConv scale and zero_point input must be scalars"
if rank == 1:
x = _op.squeeze(x, [0])
return _op.cast(x, dtype)

data = inputs[0]
x_scale = get_scalar(inputs[1])
x_zero_point = get_scalar(inputs[2], "int32")
weight = inputs[3]
w_scale = get_scalar(inputs[4])
w_zero_point = get_scalar(inputs[5], "int32")
y_scale = get_scalar(inputs[6])
y_zero_point = get_scalar(inputs[7], "int32")

input_shape = infer_shape(data)
ndim = len(input_shape)
kernel_type = infer_type(weight)
kernel_shapes = [get_const_tuple(kernel_type.checked_type.shape)]
if "kernel_shape" not in attr:
attr["kernel_shape"] = kernel_shapes[0][2:]

if "auto_pad" in attr:
attr["auto_pad"] = attr["auto_pad"].decode("utf-8")
if attr["auto_pad"] in ("SAME_UPPER", "SAME_LOWER"):
# Warning: Convolution does not yet support dynamic shapes,
# one will need to run dynamic_to_static on this model after import
data = autopad(
data,
attr.get("strides", [1] * (ndim - 2)),
attr["kernel_shape"],
attr.get("dilations", [1] * (ndim - 2)),
ndim,
pad_value=x_zero_point.data,
mode=attr["auto_pad"],
)
elif attr["auto_pad"] == "VALID":
attr["pads"] = tuple([0 for i in range(ndim - 2)])
elif attr["auto_pad"] == "NOTSET":
pass
else:
msg = 'Value {} in attribute "auto_pad" of operator Conv is invalid.'
raise tvm.error.OpAttributeInvalid(msg.format(attr["auto_pad"]))
attr.pop("auto_pad")

out_channels = kernel_shapes[0][0]
dilation = attr.get("dilations", [1] * (ndim - 2))
strides = attr.get("strides", [1] * (ndim - 2))
padding = attr["pads"] if "pads" in attr else 0
groups = attr["group"] if "group" in attr else 1

if ndim != 4:
raise tvm.error.OpAttributeInvalid(
"Only 2D kernels are supported for operator QLinearConv."
)

out = _qnn.op.conv2d(
data,
weight,
x_zero_point,
w_zero_point,
x_scale,
w_scale,
kernel_size=attr["kernel_shape"],
channels=out_channels,
strides=strides,
padding=padding,
dilation=dilation,
groups=groups,
)
use_bias = len(inputs) == 9
if use_bias:
out = _op.nn.bias_add(out, inputs[8])

out_dtype = infer_type(inputs[7]).checked_type.dtype
requantize_scale = _op.multiply(x_scale, w_scale)

# requantize requires y_scale to be constant,
# if y_scale is not constant, doing dequantize -> quantize
if isinstance(y_scale, _expr.Constant):
out = _qnn.op.requantize(
out,
requantize_scale,
_op.const(0, dtype="int32"),
y_scale,
y_zero_point,
out_dtype=out_dtype,
axis=0,
)
else:
out = _qnn.op.dequantize(out, requantize_scale, _op.const(0, dtype="int32"), axis=0)
out = _qnn.op.quantize(out, y_scale, y_zero_point, axis=0, out_dtype=out_dtype)
return out


class BitShift(OnnxOpConverter):
"""Operator converter for NonZero"""

Expand Down Expand Up @@ -3018,6 +3121,7 @@ def _get_convert_map(opset):
"DequantizeLinear": DequantizeLinear.get_converter(opset),
"DynamicQuantizeLinear": DynamicQuantizeLinear.get_converter(opset),
"ReverseSequence": ReverseSequence.get_converter(opset),
"QLinearConv": QLinearConv.get_converter(opset),
}


Expand Down
202 changes: 198 additions & 4 deletions tests/python/frontend/onnx/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,14 @@ def get_tvm_output_with_vm(


def get_tvm_output(
graph_def, input_data, target, device, output_shape=None, output_dtype="float32", opset=None
graph_def,
input_data,
target,
device,
output_shape=None,
output_dtype="float32",
opset=None,
opt_level=1,
):
"""Generic function to execute and get tvm output"""
# TODO: Resolve the issues and remove the following lines
Expand All @@ -76,7 +83,8 @@ def get_tvm_output(
input_names, shape_dict = get_input_data_shape_dict(graph_def, input_data)

mod, params = relay.frontend.from_onnx(graph_def, shape_dict, opset=opset)
with tvm.transform.PassContext(opt_level=1):

with tvm.transform.PassContext(opt_level=opt_level):
graph, lib, params = relay.build(mod, target, params=params)

m = graph_executor.create(graph, lib, device)
Expand Down Expand Up @@ -135,6 +143,7 @@ def verify_with_ort_with_inputs(
rtol=1e-5,
atol=1e-5,
apply_softmax=False,
opt_level=1,
):
if opset is not None:
model.opset_import[0].version = opset
Expand All @@ -156,7 +165,9 @@ def verify_with_ort_with_inputs(
convert_to_static=convert_to_static,
)
else:
tvm_out = get_tvm_output(model, inputs, target, dev, out_shape, dtype, opset=opset)
tvm_out = get_tvm_output(
model, inputs, target, dev, out_shape, dtype, opset=opset, opt_level=opt_level
)
if not isinstance(tvm_out, list):
tvm_out = [tvm_out]
if not isinstance(ort_out, list):
Expand Down Expand Up @@ -4219,7 +4230,6 @@ def verify_cumsum(indata, axis, exclusive=0, reverse=0, type="float32"):
"test_maxpool_with_argmax_2d_precomputed_strides/",
"test_maxunpool_export_with_output_shape/",
"test_mvn/",
"test_qlinearconv/",
"test_qlinearmatmul_2D/",
"test_qlinearmatmul_3D/",
"test_resize_tf_crop_and_resize/",
Expand Down Expand Up @@ -4387,6 +4397,189 @@ def test_reverse_sequence():
verify_reverse_sequence(x, sequence_lens, 1, 0)


def verify_qlinearconv(
x_shape,
w_shape,
y_shape,
padding,
kernel_shape,
strides,
dilations,
auto_pad="NOTSET",
bias=False,
):

x_array = np.random.randint(low=0, high=255, size=x_shape).astype("uint8")
w_array = np.random.uniform(low=0, high=255, size=w_shape).astype("uint8")

initializer = [
helper.make_tensor("x_scale", TensorProto.FLOAT, (), [np.random.rand()]),
helper.make_tensor("x_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
helper.make_tensor("w_scale", TensorProto.FLOAT, (), [np.random.rand()]),
helper.make_tensor("w_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
helper.make_tensor("y_scale", TensorProto.FLOAT, (), [np.random.rand()]),
helper.make_tensor("y_zero_point", TensorProto.UINT8, (), [np.random.randint(0, 255)]),
]

input_nodes = [
helper.make_tensor_value_info("x", TensorProto.UINT8, list(x_shape)),
helper.make_tensor_value_info("w", TensorProto.UINT8, list(w_shape)),
]
input_names = [
"x",
"x_scale",
"x_zero_point",
"w",
"w_scale",
"w_zero_point",
"y_scale",
"y_zero_point",
]
input_values = [x_array, w_array]

if bias is True:
b_shape = w_shape[0:1]
b_array = np.random.randint(low=0, high=65536, size=b_shape).astype("int32")
input_nodes.append(helper.make_tensor_value_info("B", TensorProto.INT32, list(b_shape)))
input_names.append("B")
input_values.append(b_array)

if padding is None:
## autopadding with unset default attributes
kwargs = {}
if not all([s == 1 for s in strides]):
kwargs["strides"] = strides
if not all([d == 1 for d in dilations]):
kwargs["dilations"] = dilations

node = helper.make_node(
"QLinearConv",
inputs=input_names,
outputs=["y"],
# Default values for other attributes:
auto_pad=auto_pad,
**kwargs,
)
else:
node = helper.make_node(
"QLinearConv",
inputs=input_names,
outputs=["y"],
kernel_shape=kernel_shape,
# Default values for other attributes:
strides=strides,
dilations=dilations,
# groups=1
pads=padding,
)

graph = helper.make_graph(
[node],
"conv_test",
inputs=input_nodes,
outputs=[helper.make_tensor_value_info("y", TensorProto.UINT8, list(y_shape))],
initializer=initializer,
)
model = helper.make_model(graph, producer_name="qlinearconv_test")
# opt_level=1 will cause error
verify_with_ort_with_inputs(model, input_values, opt_level=2)


def test_qlinearconv():
def repeat(N, D):
return tuple([N for _ in range(D)])

# only support QLinearConv2d because only support qnn.conv2d
D = 2

# Convolution with padding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
2 * repeat(1, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
)

# Convolution with bias
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
2 * repeat(1, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
bias=True,
)

# Convolution with assymetric padding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(4, D),
repeat(0, D) + repeat(1, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
)
# Convolution without padding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(3, D),
2 * repeat(0, D),
repeat(3, D),
repeat(1, D),
repeat(1, D),
)
# Convolution with autopadding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
None,
repeat(3, D),
repeat(1, D),
repeat(1, D),
auto_pad="SAME_UPPER",
)
# Convolution with valid autopadding
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(3, D),
None,
repeat(3, D),
repeat(1, D),
repeat(1, D),
auto_pad="VALID",
)
# Convolution with non uniform stride
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(3, D),
None,
repeat(3, D),
repeat(2, D),
repeat(1, D),
auto_pad="SAME_UPPER",
)
# Convolution with dilation
verify_qlinearconv(
(1, 1) + repeat(5, D),
(1, 1) + repeat(3, D),
(1, 1) + repeat(5, D),
2 * repeat(2, D),
repeat(3, D),
repeat(1, D),
repeat(2, D),
)


if __name__ == "__main__":
test_flatten()
test_reshape()
Expand Down Expand Up @@ -4468,3 +4661,4 @@ def test_reverse_sequence():
test_wrong_input()
test_aten()
test_reverse_sequence()
test_qlinearconv()

0 comments on commit 158aedd

Please sign in to comment.