From b2228c0f17698e99cf95bdeeca587ad52e6b5223 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Mon, 4 Oct 2021 04:55:32 -0500 Subject: [PATCH 01/11] [ONNX] Add MatMulInteger16 contrib op --- python/tvm/relay/frontend/onnx.py | 83 ++++++++++++++++++++++ tests/python/frontend/onnx/test_forward.py | 43 ++++++++++- 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 86cb178d0875..7c5b9461c4f9 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -873,6 +873,88 @@ def flatten_to_nd(x, x_shape, nd=3): return _op.nn.dense(inputs[0], input_1_t) +class MatMulInteger16(OnnxOpConverter): + """Operator converter for MatMulInteger16 from Microsoft onnxruntime contrib opset.""" + + @classmethod + def _impl_v10(cls, inputs, attr, params): + assert len(inputs) == 2, "MatMul op take 2 inputs, {} given".format(len(inputs)) + a_shape = shape_of(inputs[0]) + a_rank = infer_shape(a_shape)[0] + b_shape = shape_of(inputs[1]) + b_rank = infer_shape(b_shape)[0] + a_dtype = infer_type(inputs[0]).checked_type.dtype + b_dtype = infer_type(inputs[1]).checked_type.dtype + # Check input data types + assert a_dtype in ("int16", "uint16"), "MatMulInteger16: invalid dtype for first input" + assert b_dtype in ("int16", "uint16"), "MatMulInteger16: invalid dtype for second input" + out_dtype = "int32" + # Set output data type as uint32 when both inputs are uint16 + if a_dtype == "uint16" and b_dtype == "uint16": + out_dtype = "uint32" + if a_rank > 2 or b_rank > 2: + def flatten_to_nd(x, x_shape, nd=3): + ndims = infer_shape(x_shape)[0] + if ndims == nd: + return x + newshape = _op.concatenate( + [ + _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), + _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), + ], + 0, + ) + out = _op.reshape(x, fold_constant(newshape)) + return out + + b_type = infer_type(inputs[1]) + # Convert to dense if the second matrix is 2d and non-dynamic + if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): + a = flatten_to_nd(inputs[0], a_shape, 2) + b = _op.transpose(inputs[1]) + output = _op.nn.dense(a, b, out_dtype=out_dtype) + else: + # Convert a and b into 3 dimensional tensors. + a = flatten_to_nd(inputs[0], a_shape, 3) + b = flatten_to_nd(inputs[1], b_shape, 3) + # Perform a NN batch matmul. + output = _op.nn.batch_matmul(a, b, out_dtype=out_dtype, transpose_b=False) + # Determine the output batch dimension. + if a_rank > b_rank: + out_batch = _op.strided_slice(a_shape, [0], [a_rank - 2]) + elif a_rank < b_rank: + out_batch = _op.strided_slice(b_shape, [0], [b_rank - 2]) + # If its unclear how broadcasting should be applied, the output + # shape is determined by choosing the maximum value from each input. + else: + out_batch = _op.concatenate( + [ + _op.maximum( + _op.strided_slice(a_shape, [i], [i + 1]), + _op.strided_slice(b_shape, [i], [i + 1]), + ) + for i in range(a_rank - 2) + ], + 0, + ) + # Reshape output to original dimensions. + final_shape = _op.concatenate( + [ + out_batch, + _op.strided_slice( + a_shape, [infer_shape(a_shape)[0] - 2], [infer_shape(a_shape)[0] - 1] + ), + _op.strided_slice( + b_shape, [infer_shape(b_shape)[0] - 1], [infer_shape(b_shape)[0]] + ), + ], + 0, + ) + return _op.reshape(output, fold_constant(final_shape)) + # Use relay matmul + return _op.nn.matmul(inputs[0], inputs[1], out_dtype=out_dtype) + + class Mod(OnnxOpConverter): """Operator converter for Mod.""" @@ -4144,6 +4226,7 @@ def _get_convert_map(opset): "Softsign": Softsign.get_converter(opset), "Gemm": Gemm.get_converter(opset), "MatMul": MatMul.get_converter(opset), + "MatMulInteger16": MatMulInteger16.get_converter(opset), "Mod": Mod.get_converter(opset), "Xor": Renamer("logical_xor"), # defs/nn diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 69bb44e360ff..6d777eb017b8 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -1281,6 +1281,47 @@ def verify_batch_matmul(a_shape, b_shape, out_shape, convert_config=None): convert_config={"use_nt_batch_matmul": False}, ) +@tvm.testing.parametrize_targets +def test_matmulinteger16(target, dev): + def verify_matmulinteger16(a_shape, b_shape, out_shape): + a_dtype = "int16" + b_dtype = "int16" + low = -10 + high = 10 + + a_proto = TensorProto.INT16 + b_proto = TensorProto.INT16 + out_proto = TensorProto.INT32 + a_array = np.random.randint(low, high, size=a_shape).astype(a_dtype) + b_array = np.random.randint(low, high, size=b_shape).astype(b_dtype) + + mul_node = helper.make_node("MatMulInteger16", + ["a", "b"], + ["out"], + domain="com.microsoft") + + graph = helper.make_graph( + [mul_node], + "matmuli16_test", + inputs=[ + helper.make_tensor_value_info("a", a_proto, list(a_shape)), + helper.make_tensor_value_info("b", b_proto, list(b_shape)), + ], + outputs=[helper.make_tensor_value_info("out", out_proto, list(out_shape))], + ) + + model = helper.make_model(graph, producer_name="matmuli16_test") + verify_with_ort_with_inputs(model, [a_array, b_array], target=target, dev=dev) + + # Working tests + verify_matmulinteger16((2, 4, 3), (1, 3, 4), (2, 4, 4)) + verify_matmulinteger16((1, 4, 3), (2, 3, 4), (2, 4, 4)) + verify_matmulinteger16((4, 3), (3, 4), (4, 4)) + verify_matmulinteger16((5, 7), (7, 8), (5, 8)) + verify_matmulinteger16((2, 3, 4, 3), (2, 3, 3, 4), (2, 3, 4, 4)) + verify_matmulinteger16((2, 4, 3), (3, 4), (2, 4, 4)) + verify_matmulinteger16((2, 3, 4, 3), (3, 4), (2, 3, 4, 4)) + def verify_simple_dynamic_model(a_shape, b_shape, target, dev): def verify_model(model, a_shape, b_shape): @@ -4932,7 +4973,6 @@ def verify_eyelike(indata): "test_if_seq", "test_loop11", "test_loop13_seq", - "test_matmulinteger", "test_maxpool_2d_same_lower", "test_maxpool_2d_same_upper", "test_maxpool_with_argmax_2d_precomputed_pads", @@ -5801,6 +5841,7 @@ def repeat(N, D): test_onehot() test_gemm() test_matmul() + test_matmulinteger16() test_gather() test_gatherelements() test_gather_nd() From d6503bf4b27d3db08ad1b05d021c1d4f55ed3cad Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Mon, 4 Oct 2021 05:09:32 -0500 Subject: [PATCH 02/11] Fix formatting errors --- python/tvm/relay/frontend/onnx.py | 3 ++- tests/python/frontend/onnx/test_forward.py | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 7c5b9461c4f9..7e4d033b5725 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -891,8 +891,9 @@ def _impl_v10(cls, inputs, attr, params): out_dtype = "int32" # Set output data type as uint32 when both inputs are uint16 if a_dtype == "uint16" and b_dtype == "uint16": - out_dtype = "uint32" + out_dtype = "uint32" if a_rank > 2 or b_rank > 2: + def flatten_to_nd(x, x_shape, nd=3): ndims = infer_shape(x_shape)[0] if ndims == nd: diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 6d777eb017b8..703e69e981a2 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -1281,6 +1281,7 @@ def verify_batch_matmul(a_shape, b_shape, out_shape, convert_config=None): convert_config={"use_nt_batch_matmul": False}, ) + @tvm.testing.parametrize_targets def test_matmulinteger16(target, dev): def verify_matmulinteger16(a_shape, b_shape, out_shape): @@ -1295,10 +1296,7 @@ def verify_matmulinteger16(a_shape, b_shape, out_shape): a_array = np.random.randint(low, high, size=a_shape).astype(a_dtype) b_array = np.random.randint(low, high, size=b_shape).astype(b_dtype) - mul_node = helper.make_node("MatMulInteger16", - ["a", "b"], - ["out"], - domain="com.microsoft") + mul_node = helper.make_node("MatMulInteger16", ["a", "b"], ["out"], domain="com.microsoft") graph = helper.make_graph( [mul_node], From 77c1be7c97fa3253292942b66259f6501145ecce Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Mon, 4 Oct 2021 09:06:49 -0500 Subject: [PATCH 03/11] Remove a code comment and do not set default value of nd --- python/tvm/relay/frontend/onnx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 7e4d033b5725..549b54790624 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -889,12 +889,11 @@ def _impl_v10(cls, inputs, attr, params): assert a_dtype in ("int16", "uint16"), "MatMulInteger16: invalid dtype for first input" assert b_dtype in ("int16", "uint16"), "MatMulInteger16: invalid dtype for second input" out_dtype = "int32" - # Set output data type as uint32 when both inputs are uint16 if a_dtype == "uint16" and b_dtype == "uint16": out_dtype = "uint32" if a_rank > 2 or b_rank > 2: - def flatten_to_nd(x, x_shape, nd=3): + def flatten_to_nd(x, x_shape, nd): ndims = infer_shape(x_shape)[0] if ndims == nd: return x From ccbe433937c0661414a5e62e1f6321058057e158 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Mon, 4 Oct 2021 11:06:45 -0500 Subject: [PATCH 04/11] Move flatten_to_nd function outside matmul to be used across multiple functions --- python/tvm/relay/frontend/onnx.py | 45 ++++++++-------------- tests/python/frontend/onnx/test_forward.py | 1 + 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 549b54790624..9e39d3fe1105 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -212,6 +212,21 @@ def get_scalar(x, params, dtype="float32"): return _op.cast(x, dtype) +def flatten_to_nd(x, x_shape, nd=3): + ndims = infer_shape(x_shape)[0] + if ndims == nd: + return x + newshape = _op.concatenate( + [ + _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), + _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), + ], + 0, + ) + out = _op.reshape(x, fold_constant(newshape)) + return out + + class OnnxOpConverter(object): """A helper class for holding onnx op converters.""" @@ -803,21 +818,6 @@ def _impl_v1(cls, inputs, attr, params): b_rank = infer_shape(b_shape)[0] # When performing a batch matmul, we need to properly handle N-dim shapes. if a_rank > 2 or b_rank > 2: - - def flatten_to_nd(x, x_shape, nd=3): - ndims = infer_shape(x_shape)[0] - if ndims == nd: - return x - newshape = _op.concatenate( - [ - _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), - _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), - ], - 0, - ) - out = _op.reshape(x, fold_constant(newshape)) - return out - b_type = infer_type(inputs[1]) # Convert to dense if the second matrix is 2d and non-dynamic if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): @@ -892,21 +892,6 @@ def _impl_v10(cls, inputs, attr, params): if a_dtype == "uint16" and b_dtype == "uint16": out_dtype = "uint32" if a_rank > 2 or b_rank > 2: - - def flatten_to_nd(x, x_shape, nd): - ndims = infer_shape(x_shape)[0] - if ndims == nd: - return x - newshape = _op.concatenate( - [ - _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), - _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), - ], - 0, - ) - out = _op.reshape(x, fold_constant(newshape)) - return out - b_type = infer_type(inputs[1]) # Convert to dense if the second matrix is 2d and non-dynamic if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 703e69e981a2..d1e987a30a8f 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -4971,6 +4971,7 @@ def verify_eyelike(indata): "test_if_seq", "test_loop11", "test_loop13_seq", + "test_matmulinteger", "test_maxpool_2d_same_lower", "test_maxpool_2d_same_upper", "test_maxpool_with_argmax_2d_precomputed_pads", From 69f245351b435df5f124a23462fe674578c8a7ee Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Mon, 4 Oct 2021 23:44:08 -0500 Subject: [PATCH 05/11] Add function docstring and describe the tests --- python/tvm/relay/frontend/onnx.py | 1 + tests/python/frontend/onnx/test_forward.py | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 9e39d3fe1105..c1e282a5fb52 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -213,6 +213,7 @@ def get_scalar(x, params, dtype="float32"): def flatten_to_nd(x, x_shape, nd=3): + """Helper to flatten multi dimensional arrays to specific dimension""" ndims = infer_shape(x_shape)[0] if ndims == nd: return x diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index d1e987a30a8f..b018e558ca34 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -1311,13 +1311,15 @@ def verify_matmulinteger16(a_shape, b_shape, out_shape): model = helper.make_model(graph, producer_name="matmuli16_test") verify_with_ort_with_inputs(model, [a_array, b_array], target=target, dev=dev) - # Working tests - verify_matmulinteger16((2, 4, 3), (1, 3, 4), (2, 4, 4)) - verify_matmulinteger16((1, 4, 3), (2, 3, 4), (2, 4, 4)) + # 2D computation to verify matmul op verify_matmulinteger16((4, 3), (3, 4), (4, 4)) verify_matmulinteger16((5, 7), (7, 8), (5, 8)) - verify_matmulinteger16((2, 3, 4, 3), (2, 3, 3, 4), (2, 3, 4, 4)) - verify_matmulinteger16((2, 4, 3), (3, 4), (2, 4, 4)) + # Verify 3D matmul using batch_matmul op + verify_matmulinteger16((2, 4, 3), (1, 3, 4), (2, 4, 4)) + verify_matmulinteger16((1, 4, 3), (2, 3, 4), (2, 4, 4)) + # Test implicit broadcasting + verify_matmulinteger16((2, 3, 5, 3), (2, 3, 3, 5), (2, 3, 5, 5)) + verify_matmulinteger16((2, 7, 3), (3, 7), (2, 7, 7)) verify_matmulinteger16((2, 3, 4, 3), (3, 4), (2, 3, 4, 4)) From 1aa71eb072ec945b62802cc6faa023d0739aabf9 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Thu, 7 Oct 2021 12:44:16 -0500 Subject: [PATCH 06/11] Use max/min value of int16 as high/low while generating input vectors --- tests/python/frontend/onnx/test_forward.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index b018e558ca34..c2fa08c2d0f9 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -1287,8 +1287,8 @@ def test_matmulinteger16(target, dev): def verify_matmulinteger16(a_shape, b_shape, out_shape): a_dtype = "int16" b_dtype = "int16" - low = -10 - high = 10 + low = np.iinfo(np.int16).min + high = np.iinfo(np.int16).max a_proto = TensorProto.INT16 b_proto = TensorProto.INT16 From b2f7437c9252054cc180e3858c2fd4ad26094b16 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Thu, 18 Nov 2021 01:41:54 -0600 Subject: [PATCH 07/11] Converge MatMul and MatMulInteger16 ops into a single op using output dtype --- python/tvm/relay/frontend/onnx.py | 198 +++++++++++------------------- 1 file changed, 72 insertions(+), 126 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index c1e282a5fb52..9b99dbed4cb7 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -212,20 +212,75 @@ def get_scalar(x, params, dtype="float32"): return _op.cast(x, dtype) -def flatten_to_nd(x, x_shape, nd=3): - """Helper to flatten multi dimensional arrays to specific dimension""" - ndims = infer_shape(x_shape)[0] - if ndims == nd: - return x - newshape = _op.concatenate( - [ - _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), - _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), - ], - 0, - ) - out = _op.reshape(x, fold_constant(newshape)) - return out +def matmul_out_dtype(inputs, out_dtype): + """Common function to handle MatMul and MatMulInteger16""" + a_shape = shape_of(inputs[0]) + a_rank = infer_shape(a_shape)[0] + b_shape = shape_of(inputs[1]) + b_rank = infer_shape(b_shape)[0] + if a_rank > 2 or b_rank > 2: + def flatten_to_nd(x, x_shape, nd=3): + ndims = infer_shape(x_shape)[0] + if ndims == nd: + return x + newshape = _op.concatenate( + [ + _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), + _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), + ], + 0, + ) + out = _op.reshape(x, fold_constant(newshape)) + return out + + b_type = infer_type(inputs[1]) + # Convert to dense if the second matrix is 2d and non-dynamic + if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): + a = flatten_to_nd(inputs[0], a_shape, 2) + b = _op.transpose(inputs[1]) + output = _op.nn.dense(a, b, out_dtype=out_dtype) + else: + # Convert a and b into 3 dimensional tensors. + a = flatten_to_nd(inputs[0], a_shape, 3) + b = flatten_to_nd(inputs[1], b_shape, 3) + # Perform a NN batch matmul. + output = _op.nn.batch_matmul(a, b, out_dtype=out_dtype, transpose_b=False) + # Determine the output batch dimension. + if a_rank > b_rank: + out_batch = _op.strided_slice(a_shape, [0], [a_rank - 2]) + elif a_rank < b_rank: + out_batch = _op.strided_slice(b_shape, [0], [b_rank - 2]) + # If its unclear how broadcasting should be applied, the output + # shape is determined by choosing the maximum value from each input. + else: + out_batch = _op.concatenate( + [ + _op.maximum( + _op.strided_slice(a_shape, [i], [i + 1]), + _op.strided_slice(b_shape, [i], [i + 1]), + ) + for i in range(a_rank - 2) + ], + 0, + ) + # Reshape output to original dimensions. + final_shape = _op.concatenate( + [ + out_batch, + _op.strided_slice( + a_shape, [infer_shape(a_shape)[0] - 2], [infer_shape(a_shape)[0] - 1] + ), + _op.strided_slice( + b_shape, [infer_shape(b_shape)[0] - 1], [infer_shape(b_shape)[0]] + ), + ], + 0, + ) + return _op.reshape(output, fold_constant(final_shape)) + # Otherwise a simple dense op will get the job done. + input_1_t = _op.transpose(inputs[1], axes=(1, 0)) + return _op.nn.dense(inputs[0], input_1_t, out_dtype=out_dtype) + class OnnxOpConverter(object): @@ -813,65 +868,7 @@ class MatMul(OnnxOpConverter): def _impl_v1(cls, inputs, attr, params): assert len(inputs) == 2, "MatMul op take 2 inputs, {} given".format(len(inputs)) # Need to check input shape as batch matmul must be supported. - a_shape = shape_of(inputs[0]) - a_rank = infer_shape(a_shape)[0] - b_shape = shape_of(inputs[1]) - b_rank = infer_shape(b_shape)[0] - # When performing a batch matmul, we need to properly handle N-dim shapes. - if a_rank > 2 or b_rank > 2: - b_type = infer_type(inputs[1]) - # Convert to dense if the second matrix is 2d and non-dynamic - if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): - a = flatten_to_nd(inputs[0], a_shape, 2) - b = _op.transpose(inputs[1]) - output = _op.nn.dense(a, b) - else: - # Convert a and b into 3 dimensional tensors. - a = flatten_to_nd(inputs[0], a_shape, 3) - b = flatten_to_nd(inputs[1], b_shape, 3) - if ONNX_DEFAULT_CONFIGS["use_nt_batch_matmul"]: - # Transpose matrix dimensions of b. - b = _op.transpose(b, [0, 2, 1]) - # Perform a NT batch matmul. - output = _op.nn.batch_matmul(a, b) - else: - # Perform a NN batch matmul. - output = _op.nn.batch_matmul(a, b, transpose_b=False) - # Determine the output batch dimension. - if a_rank > b_rank: - out_batch = _op.strided_slice(a_shape, [0], [a_rank - 2]) - elif a_rank < b_rank: - out_batch = _op.strided_slice(b_shape, [0], [b_rank - 2]) - # If its unclear how broadcasting should be applied, the output - # shape is determined by choosing the maximum value from each input. - else: - out_batch = _op.concatenate( - [ - _op.maximum( - _op.strided_slice(a_shape, [i], [i + 1]), - _op.strided_slice(b_shape, [i], [i + 1]), - ) - for i in range(a_rank - 2) - ], - 0, - ) - # Reshape output to original dimensions. - final_shape = _op.concatenate( - [ - out_batch, - _op.strided_slice( - a_shape, [infer_shape(a_shape)[0] - 2], [infer_shape(a_shape)[0] - 1] - ), - _op.strided_slice( - b_shape, [infer_shape(b_shape)[0] - 1], [infer_shape(b_shape)[0]] - ), - ], - 0, - ) - return _op.reshape(output, fold_constant(final_shape)) - # Otherwise a simple dense op will get the job done. - input_1_t = _op.transpose(inputs[1], axes=(1, 0)) - return _op.nn.dense(inputs[0], input_1_t) + return matmul_out_dtype(inputs, out_dtype=infer_type(inputs[0]).checked_type.dtype) class MatMulInteger16(OnnxOpConverter): @@ -879,11 +876,7 @@ class MatMulInteger16(OnnxOpConverter): @classmethod def _impl_v10(cls, inputs, attr, params): - assert len(inputs) == 2, "MatMul op take 2 inputs, {} given".format(len(inputs)) - a_shape = shape_of(inputs[0]) - a_rank = infer_shape(a_shape)[0] - b_shape = shape_of(inputs[1]) - b_rank = infer_shape(b_shape)[0] + assert len(inputs) == 2, "MatMulInteger16 op take 2 inputs, {} given".format(len(inputs)) a_dtype = infer_type(inputs[0]).checked_type.dtype b_dtype = infer_type(inputs[1]).checked_type.dtype # Check input data types @@ -892,54 +885,7 @@ def _impl_v10(cls, inputs, attr, params): out_dtype = "int32" if a_dtype == "uint16" and b_dtype == "uint16": out_dtype = "uint32" - if a_rank > 2 or b_rank > 2: - b_type = infer_type(inputs[1]) - # Convert to dense if the second matrix is 2d and non-dynamic - if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): - a = flatten_to_nd(inputs[0], a_shape, 2) - b = _op.transpose(inputs[1]) - output = _op.nn.dense(a, b, out_dtype=out_dtype) - else: - # Convert a and b into 3 dimensional tensors. - a = flatten_to_nd(inputs[0], a_shape, 3) - b = flatten_to_nd(inputs[1], b_shape, 3) - # Perform a NN batch matmul. - output = _op.nn.batch_matmul(a, b, out_dtype=out_dtype, transpose_b=False) - # Determine the output batch dimension. - if a_rank > b_rank: - out_batch = _op.strided_slice(a_shape, [0], [a_rank - 2]) - elif a_rank < b_rank: - out_batch = _op.strided_slice(b_shape, [0], [b_rank - 2]) - # If its unclear how broadcasting should be applied, the output - # shape is determined by choosing the maximum value from each input. - else: - out_batch = _op.concatenate( - [ - _op.maximum( - _op.strided_slice(a_shape, [i], [i + 1]), - _op.strided_slice(b_shape, [i], [i + 1]), - ) - for i in range(a_rank - 2) - ], - 0, - ) - # Reshape output to original dimensions. - final_shape = _op.concatenate( - [ - out_batch, - _op.strided_slice( - a_shape, [infer_shape(a_shape)[0] - 2], [infer_shape(a_shape)[0] - 1] - ), - _op.strided_slice( - b_shape, [infer_shape(b_shape)[0] - 1], [infer_shape(b_shape)[0]] - ), - ], - 0, - ) - return _op.reshape(output, fold_constant(final_shape)) - # Use relay matmul - return _op.nn.matmul(inputs[0], inputs[1], out_dtype=out_dtype) - + return matmul_out_dtype(inputs, out_dtype) class Mod(OnnxOpConverter): """Operator converter for Mod.""" From 446fccea95f094468dd4d6d30a3b23983e8a0f36 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Thu, 18 Nov 2021 01:49:11 -0600 Subject: [PATCH 08/11] Fix indentation issues --- python/tvm/relay/frontend/onnx.py | 126 +++++++++++++++--------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 9b99dbed4cb7..65481cd509c5 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -213,73 +213,73 @@ def get_scalar(x, params, dtype="float32"): def matmul_out_dtype(inputs, out_dtype): - """Common function to handle MatMul and MatMulInteger16""" - a_shape = shape_of(inputs[0]) - a_rank = infer_shape(a_shape)[0] - b_shape = shape_of(inputs[1]) - b_rank = infer_shape(b_shape)[0] - if a_rank > 2 or b_rank > 2: - def flatten_to_nd(x, x_shape, nd=3): - ndims = infer_shape(x_shape)[0] - if ndims == nd: - return x - newshape = _op.concatenate( - [ - _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), - _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), - ], - 0, - ) - out = _op.reshape(x, fold_constant(newshape)) - return out - - b_type = infer_type(inputs[1]) - # Convert to dense if the second matrix is 2d and non-dynamic - if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): - a = flatten_to_nd(inputs[0], a_shape, 2) - b = _op.transpose(inputs[1]) - output = _op.nn.dense(a, b, out_dtype=out_dtype) - else: - # Convert a and b into 3 dimensional tensors. - a = flatten_to_nd(inputs[0], a_shape, 3) - b = flatten_to_nd(inputs[1], b_shape, 3) - # Perform a NN batch matmul. - output = _op.nn.batch_matmul(a, b, out_dtype=out_dtype, transpose_b=False) - # Determine the output batch dimension. - if a_rank > b_rank: - out_batch = _op.strided_slice(a_shape, [0], [a_rank - 2]) - elif a_rank < b_rank: - out_batch = _op.strided_slice(b_shape, [0], [b_rank - 2]) - # If its unclear how broadcasting should be applied, the output - # shape is determined by choosing the maximum value from each input. - else: - out_batch = _op.concatenate( + """Common function to handle MatMul and MatMulInteger16""" + a_shape = shape_of(inputs[0]) + a_rank = infer_shape(a_shape)[0] + b_shape = shape_of(inputs[1]) + b_rank = infer_shape(b_shape)[0] + if a_rank > 2 or b_rank > 2: + def flatten_to_nd(x, x_shape, nd=3): + ndims = infer_shape(x_shape)[0] + if ndims == nd: + return x + newshape = _op.concatenate( [ - _op.maximum( - _op.strided_slice(a_shape, [i], [i + 1]), - _op.strided_slice(b_shape, [i], [i + 1]), - ) - for i in range(a_rank - 2) + _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), + _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), ], 0, ) - # Reshape output to original dimensions. - final_shape = _op.concatenate( - [ - out_batch, - _op.strided_slice( - a_shape, [infer_shape(a_shape)[0] - 2], [infer_shape(a_shape)[0] - 1] - ), - _op.strided_slice( - b_shape, [infer_shape(b_shape)[0] - 1], [infer_shape(b_shape)[0]] - ), - ], - 0, - ) - return _op.reshape(output, fold_constant(final_shape)) - # Otherwise a simple dense op will get the job done. - input_1_t = _op.transpose(inputs[1], axes=(1, 0)) - return _op.nn.dense(inputs[0], input_1_t, out_dtype=out_dtype) + out = _op.reshape(x, fold_constant(newshape)) + return out + + b_type = infer_type(inputs[1]) + # Convert to dense if the second matrix is 2d and non-dynamic + if b_rank == 2 and not _ty.is_dynamic(b_type.checked_type): + a = flatten_to_nd(inputs[0], a_shape, 2) + b = _op.transpose(inputs[1]) + output = _op.nn.dense(a, b, out_dtype=out_dtype) + else: + # Convert a and b into 3 dimensional tensors. + a = flatten_to_nd(inputs[0], a_shape, 3) + b = flatten_to_nd(inputs[1], b_shape, 3) + # Perform a NN batch matmul. + output = _op.nn.batch_matmul(a, b, out_dtype=out_dtype, transpose_b=False) + # Determine the output batch dimension. + if a_rank > b_rank: + out_batch = _op.strided_slice(a_shape, [0], [a_rank - 2]) + elif a_rank < b_rank: + out_batch = _op.strided_slice(b_shape, [0], [b_rank - 2]) + # If its unclear how broadcasting should be applied, the output + # shape is determined by choosing the maximum value from each input. + else: + out_batch = _op.concatenate( + [ + _op.maximum( + _op.strided_slice(a_shape, [i], [i + 1]), + _op.strided_slice(b_shape, [i], [i + 1]), + ) + for i in range(a_rank - 2) + ], + 0, + ) + # Reshape output to original dimensions. + final_shape = _op.concatenate( + [ + out_batch, + _op.strided_slice( + a_shape, [infer_shape(a_shape)[0] - 2], [infer_shape(a_shape)[0] - 1] + ), + _op.strided_slice( + b_shape, [infer_shape(b_shape)[0] - 1], [infer_shape(b_shape)[0]] + ), + ], + 0, + ) + return _op.reshape(output, fold_constant(final_shape)) + # Otherwise a simple dense op will get the job done. + input_1_t = _op.transpose(inputs[1], axes=(1, 0)) + return _op.nn.dense(inputs[0], input_1_t, out_dtype=out_dtype) From 0324148eef6000ddfdb4bb775cd48ce77e166937 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Thu, 18 Nov 2021 04:23:34 -0600 Subject: [PATCH 09/11] Formatting changes --- python/tvm/relay/frontend/onnx.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 65481cd509c5..a1eeafee1553 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -219,19 +219,20 @@ def matmul_out_dtype(inputs, out_dtype): b_shape = shape_of(inputs[1]) b_rank = infer_shape(b_shape)[0] if a_rank > 2 or b_rank > 2: + def flatten_to_nd(x, x_shape, nd=3): - ndims = infer_shape(x_shape)[0] - if ndims == nd: - return x - newshape = _op.concatenate( - [ - _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), - _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), - ], - 0, - ) - out = _op.reshape(x, fold_constant(newshape)) - return out + ndims = infer_shape(x_shape)[0] + if ndims == nd: + return x + newshape = _op.concatenate( + [ + _expr.const([-1], dtype=infer_type(x_shape).checked_type.dtype), + _op.strided_slice(x_shape, [ndims - nd + 1], [ndims]), + ], + 0, + ) + out = _op.reshape(x, fold_constant(newshape)) + return out b_type = infer_type(inputs[1]) # Convert to dense if the second matrix is 2d and non-dynamic @@ -282,7 +283,6 @@ def flatten_to_nd(x, x_shape, nd=3): return _op.nn.dense(inputs[0], input_1_t, out_dtype=out_dtype) - class OnnxOpConverter(object): """A helper class for holding onnx op converters.""" @@ -887,6 +887,7 @@ def _impl_v10(cls, inputs, attr, params): out_dtype = "uint32" return matmul_out_dtype(inputs, out_dtype) + class Mod(OnnxOpConverter): """Operator converter for Mod.""" From d5d7b307636f9ffa41bb87b5350ff06834509339 Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Sun, 21 Nov 2021 01:36:31 -0600 Subject: [PATCH 10/11] Fix CUDA batchmatmul strategy to allow mixed precision --- python/tvm/relay/op/strategy/cuda.py | 2 +- tests/python/frontend/onnx/test_forward.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index da7cbd5cec10..f17903806b27 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -839,7 +839,7 @@ def batch_matmul_strategy_cuda(attrs, inputs, out_type, target): ) else: strategy.add_implementation( - wrap_compute_batch_matmul(topi.cuda.batch_matmul), + wrap_compute_batch_matmul(topi.cuda.batch_matmul, need_out_dtype=True), wrap_topi_schedule(topi.cuda.schedule_batch_matmul), name="batch_matmul.cuda", plevel=10, diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index c2fa08c2d0f9..dcbd662ec219 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -4973,7 +4973,6 @@ def verify_eyelike(indata): "test_if_seq", "test_loop11", "test_loop13_seq", - "test_matmulinteger", "test_maxpool_2d_same_lower", "test_maxpool_2d_same_upper", "test_maxpool_with_argmax_2d_precomputed_pads", From c0b868f92047c4ca957d5649a52b8dd957f9f3cf Mon Sep 17 00:00:00 2001 From: Abhikrant Sharma Date: Sun, 21 Nov 2021 08:09:51 -0600 Subject: [PATCH 11/11] Add test_matmulinteger to unsupported_onnx_tests --- tests/python/frontend/onnx/test_forward.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index ef1d9f4ab3e4..01b570cdc245 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -5009,6 +5009,7 @@ def verify_eyelike(indata): "test_loop11", "test_loop13_seq", "test_lstm_batchwise", + "test_matmulinteger", "test_maxpool_2d_same_lower", "test_maxpool_2d_same_upper", "test_maxpool_with_argmax_2d_precomputed_pads",