From 29e5e8a9b8e1701cf344a51bebe50b65d6655577 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 12 Jan 2022 16:31:31 -0800 Subject: [PATCH 01/72] initial tanh impl --- .../transform/fake_quantization_to_integer.py | 104 ++++++++++++++++++ .../test_pass_fake_quantization_to_integer.py | 16 +++ 2 files changed, 120 insertions(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index db46c2cbfd58..19e7cee6a9dc 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -92,6 +92,110 @@ def identity(expr, type_map): return register_fake_quantization_to_integer(op_name, identity) +# TODO: replace with constant folding +def run_const_expr(expr): + mod = tvm.IRModule.from_expr(expr) + vm_exe = relay.create_executor("vm", mod=mod) + return vm_exe.evaluate()().asnumpy() + + +def create_integer_lookup_table( + floating_point_func, + input_scale, + input_zero_point, + output_scale, + output_zero_point, + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", +): + if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( + np.dtype(out_dtype), np.integer + ): + raise ValueError( + f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes." + ) + + dtype_info = np.iinfo(in_dtype) + + # Use TVMs quantization methods via relay to be consistent + inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) + inputs_dequantized = run_const_expr( + relay.qnn.op.dequantize( + inputs_quantized, + input_scale=input_scale, + input_zero_point=input_zero_point, + axis=in_axis, + ) + ) + + output_dequantized = relay.const(floating_point_func(inputs_dequantized)) + output_quantized = run_const_expr( + relay.qnn.op.quantize( + output_dequantized, output_scale, output_zero_point, out_axis, out_dtype + ) + ) + + return output_quantized + + +def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): + """Implement an operator in quantized space via table lookup operations (e.g. via gather). + + op_name: str + The name of the operator to register for FQ2I. + + example_func: Callable[[np.ndarray], np.ndarray] + The FP32 version of the function to quantize operating on numpy arrays. + """ + + def func(expr, type_map): + assert len(expr.args) == 1 + arg = expr.args[0] + in_scale = fold_constant(type_map[arg].scale) + in_zero_point = fold_constant(type_map[arg].zero_point) + out_scale = fold_constant(type_map[expr].scale) + out_zero_point = fold_constant(type_map[expr].zero_point) + if ( + not isinstance(in_scale, relay.Constant) + or not isinstance(in_zero_point, relay.Constant) + or not isinstance(out_scale, relay.Constant) + or not isinstance(out_zero_point, relay.Constant) + ): + raise ValueError( + f"{op_name} requires input/output quantization params to be known at compile time!" + ) + + # TODO: handle multi-channel q + in_scale = in_scale.data.numpy().item() + in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() + out_zero_point = out_zero_point.data.numpy().item() + + lookup_table = create_integer_lookup_table( + floating_point_func, + relay.const(in_scale), + relay.const(in_zero_point, dtype="int32"), + relay.const(out_scale), + relay.const(out_zero_point, dtype="int32"), + in_axis=type_map[arg].axis, + in_dtype=type_map[arg].dtype, + out_axis=type_map[expr].axis, + out_dtype=type_map[expr].dtype, + ) + lookup_table = relay.const(lookup_table) + index_tensor = relay.reshape(arg, [-1]) + result = relay.gather(lookup_table, -1, index_tensor) + result = relay.reshape_like(result, arg) + return [result, type_map[expr]] + + return register_fake_quantization_to_integer(op_name, func) + + +register_unary_elementwise_table_lookup_op("tanh", np.tanh) + register_unary_identity("reshape") register_unary_identity("squeeze") register_unary_identity("strided_slice") diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index aee2741782fd..9a2384705982 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -26,6 +26,7 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False): mod = tvm.relay.transform.InferType()(mod) mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod) + breakpoint() assert not tvm.ir.structural_equal(mod, mod_int) result = ( @@ -586,6 +587,21 @@ def run_test_case(partial_func): run_test_case(lambda x: relay.op.min(x, axis=1)) +def test_fake_quantize_tanh(): + x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + op = relay.op.tanh(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(1.0), zero) + + x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + def test_fq_hard_fail(): @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11) def conv2d(expr, type_map): # pylint: disable=unused-variable From b414aeba93655559d5c6ec936e51f700bbf2b8f7 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 12 Jan 2022 16:42:28 -0800 Subject: [PATCH 02/72] smalls error --- python/tvm/relay/transform/fake_quantization_to_integer.py | 1 + tests/python/relay/test_pass_fake_quantization_to_integer.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 19e7cee6a9dc..84c22f8f9a94 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -189,6 +189,7 @@ def func(expr, type_map): index_tensor = relay.reshape(arg, [-1]) result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, arg) + breakpoint() return [result, type_map[expr]] return register_fake_quantization_to_integer(op_name, func) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 9a2384705982..113f1f21759b 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -591,11 +591,11 @@ def test_fake_quantize_tanh(): x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) op = relay.op.tanh(x) # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(1.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") From 8f7a4f6744369d965be14ae9e83c963b2529018c Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 18 Jan 2022 13:48:16 -0800 Subject: [PATCH 03/72] support uint and int lookup into tables --- .../transform/fake_quantization_to_integer.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 84c22f8f9a94..24907dc0c2ec 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -119,8 +119,20 @@ def create_integer_lookup_table( dtype_info = np.iinfo(in_dtype) + num_bits = dtype_info.bits + # Use TVMs quantization methods via relay to be consistent - inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + + # First generate a list of all num_bit integer patterns + inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}") + + # Reinterpret bits as the real datatype + # Note what we are doing here is a bit tricky, the canonical view of our lookup table + # is using the uintX version. When we run the lookup in the relay graph, we note + # that the "gather" operation used supports negative indices which make the mapping + # valid! + inputs_quantized = inputs_quantized.view(in_dtype) inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) inputs_dequantized = run_const_expr( relay.qnn.op.dequantize( @@ -185,11 +197,11 @@ def func(expr, type_map): out_axis=type_map[expr].axis, out_dtype=type_map[expr].dtype, ) + lookup_table = relay.const(lookup_table) index_tensor = relay.reshape(arg, [-1]) result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, arg) - breakpoint() return [result, type_map[expr]] return register_fake_quantization_to_integer(op_name, func) From b8a54ee61587512de25f9b6502ec8cbcb0bb25ce Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 18 Jan 2022 15:40:08 -0800 Subject: [PATCH 04/72] reinterpret cast, working tanh tests --- include/tvm/topi/transform.h | 2 +- .../relay/transform/fake_quantization_to_integer.py | 11 +++++++---- src/relay/op/tensor/transform.cc | 3 ++- .../relay/test_pass_fake_quantization_to_integer.py | 5 ++--- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h index 1ad9d7da72ba..83efd1c5825d 100644 --- a/include/tvm/topi/transform.h +++ b/include/tvm/topi/transform.h @@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices, size_t indices_dim_i = static_cast(GetConstInt(indices->shape[axis])); ICHECK_GE(indices_dim_i, 1); } - ICHECK(indices->dtype.is_int()); + ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()); Array out_shape; for (size_t i = 0; i < ndim_i; ++i) { diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 24907dc0c2ec..e15398c93e06 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -129,9 +129,8 @@ def create_integer_lookup_table( # Reinterpret bits as the real datatype # Note what we are doing here is a bit tricky, the canonical view of our lookup table - # is using the uintX version. When we run the lookup in the relay graph, we note - # that the "gather" operation used supports negative indices which make the mapping - # valid! + # is using the uintX version. When we run the lookup in the relay graph, we cast the + # bit pattern back into this form. inputs_quantized = inputs_quantized.view(in_dtype) inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) inputs_dequantized = run_const_expr( @@ -197,9 +196,13 @@ def func(expr, type_map): out_axis=type_map[expr].axis, out_dtype=type_map[expr].dtype, ) - + + in_dtype_info = np.iinfo(type_map[arg].dtype) + in_dtype_num_bits = in_dtype_info.bits + lookup_table = relay.const(lookup_table) index_tensor = relay.reshape(arg, [-1]) + index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, arg) return [result, type_map[expr]] diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 9e469f373131..ebf8a0653b70 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3318,7 +3318,8 @@ bool GatherRel(const Array& types, int num_inputs, const Attrs& attrs, << "Gather: expect indices type to be TensorType but get " << types[1]; return false; } - ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer"; + ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()) + << "indices of gather must be tensor of integer"; const auto param = attrs.as(); ICHECK(param != nullptr); ICHECK(param->axis.defined()); diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 113f1f21759b..8c016e4c8976 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -26,7 +26,6 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False): mod = tvm.relay.transform.InferType()(mod) mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod) - breakpoint() assert not tvm.ir.structural_equal(mod, mod_int) result = ( @@ -588,7 +587,7 @@ def run_test_case(partial_func): def test_fake_quantize_tanh(): - x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") zero = relay.const(0) x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) @@ -597,7 +596,7 @@ def test_fake_quantize_tanh(): # Have difference scales for input/output to test if can handle op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") compare_fq_to_int(op, [x_np]) From cf3eb4ea1cb736246e5309debb4d5270c69126b5 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 09:47:56 -0800 Subject: [PATCH 05/72] refactor relay func creation --- .../transform/fake_quantization_to_integer.py | 65 ++++++++++++++++++- .../test_pass_fake_quantization_to_integer.py | 27 ++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index e15398c93e06..eca3d12d7bcd 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -152,6 +152,50 @@ def create_integer_lookup_table( return output_quantized +def create_integer_lookup_op( + input_arg, + floating_point_func, + in_scale, + in_zero_point, + out_scale, + out_zero_point, + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", +): + """ + TODO + """ + # TODO: handle multi-channel q + in_scale = in_scale.data.numpy().item() + in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() + out_zero_point = out_zero_point.data.numpy().item() + + lookup_table = create_integer_lookup_table( + floating_point_func, + relay.const(in_scale), + relay.const(in_zero_point, dtype="int32"), + relay.const(out_scale), + relay.const(out_zero_point, dtype="int32"), + in_axis=in_axis, + in_dtype=in_dtype, + out_axis=out_axis, + out_dtype=out_dtype, + ) + + in_dtype_info = np.iinfo(in_dtype) + in_dtype_num_bits = in_dtype_info.bits + + lookup_table = relay.const(lookup_table) + index_tensor = relay.reshape(input_arg, [-1]) + index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") + result = relay.gather(lookup_table, -1, index_tensor) + result = relay.reshape_like(result, input_arg) + return result + + def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): """Implement an operator in quantized space via table lookup operations (e.g. via gather). @@ -163,12 +207,31 @@ def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): """ def func(expr, type_map): - assert len(expr.args) == 1 + assert len(expr.args) == 1, "only support elemwise ops for now!" arg = expr.args[0] in_scale = fold_constant(type_map[arg].scale) in_zero_point = fold_constant(type_map[arg].zero_point) out_scale = fold_constant(type_map[expr].scale) out_zero_point = fold_constant(type_map[expr].zero_point) + in_axis = type_map[arg].axis + in_dtype = type_map[arg].dtype + out_axis = type_map[expr].axis + out_dtype = type_map[expr].dtype + result = create_integer_lookup_op( + input_arg=arg, + floating_point_func=floating_point_func, + in_scale=in_scale, + in_zero_point=in_zero_point, + out_scale=out_scale, + out_zero_point=out_zero_point, + in_axis=in_axis, + in_dtype=in_dtype, + out_axis=out_axis, + out_dtype=out_dtype, + ) + return [result, type_map[expr]] + arg = expr.args[0] + if ( not isinstance(in_scale, relay.Constant) or not isinstance(in_zero_point, relay.Constant) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 8c016e4c8976..a23a316ae50f 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -19,6 +19,7 @@ import pytest import tvm from tvm import relay +from tvm.relay.transform import fake_quantization_to_integer def compare_fq_to_int(expr, args, allow_rounding_error=False): @@ -586,6 +587,32 @@ def run_test_case(partial_func): run_test_case(lambda x: relay.op.min(x, axis=1)) +class TestIntegerTableLookupTable: + """Consists of tests testing functionality of creating lookup tables for integer operations.""" + + # def __init__(self) -> None: + # self.input = np.arange(start=0, stop=256, dtype="uint8") + + def fake_identity_func_numpy(self, arr: np.ndarray): + return arr.astype("float32") + + """ + def fake_identity_func_relay(self): + fake_quantization_to_integer.register_fake_quantization_to_integer.fu + """ + + def test_int8_to_int8(self): + relay_result = fake_quantization_to_integer.create_integer_lookup_table( + self.uint8_identity_func, + relay.const(1.0, dtype="float32"), + relay.const(0, dtype="int32"), + relay.const(1.0, dtype="float32"), + relay.const(0, dtype="int32"), + in_dtype="int8", + out_dtype="int8", + ) + + def test_fake_quantize_tanh(): x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") From 0c1a71d8ffab8bf59ac11f4c32f3037302924cff Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 10:15:41 -0800 Subject: [PATCH 06/72] basic casting tests --- .../transform/fake_quantization_to_integer.py | 39 ---------- .../test_pass_fake_quantization_to_integer.py | 74 ++++++++++++++++--- 2 files changed, 64 insertions(+), 49 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index eca3d12d7bcd..96f5faab9b90 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -230,45 +230,6 @@ def func(expr, type_map): out_dtype=out_dtype, ) return [result, type_map[expr]] - arg = expr.args[0] - - if ( - not isinstance(in_scale, relay.Constant) - or not isinstance(in_zero_point, relay.Constant) - or not isinstance(out_scale, relay.Constant) - or not isinstance(out_zero_point, relay.Constant) - ): - raise ValueError( - f"{op_name} requires input/output quantization params to be known at compile time!" - ) - - # TODO: handle multi-channel q - in_scale = in_scale.data.numpy().item() - in_zero_point = in_zero_point.data.numpy().item() - out_scale = out_scale.data.numpy().item() - out_zero_point = out_zero_point.data.numpy().item() - - lookup_table = create_integer_lookup_table( - floating_point_func, - relay.const(in_scale), - relay.const(in_zero_point, dtype="int32"), - relay.const(out_scale), - relay.const(out_zero_point, dtype="int32"), - in_axis=type_map[arg].axis, - in_dtype=type_map[arg].dtype, - out_axis=type_map[expr].axis, - out_dtype=type_map[expr].dtype, - ) - - in_dtype_info = np.iinfo(type_map[arg].dtype) - in_dtype_num_bits = in_dtype_info.bits - - lookup_table = relay.const(lookup_table) - index_tensor = relay.reshape(arg, [-1]) - index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") - result = relay.gather(lookup_table, -1, index_tensor) - result = relay.reshape_like(result, arg) - return [result, type_map[expr]] return register_fake_quantization_to_integer(op_name, func) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index a23a316ae50f..714dce4e6186 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -596,21 +596,75 @@ class TestIntegerTableLookupTable: def fake_identity_func_numpy(self, arr: np.ndarray): return arr.astype("float32") - """ - def fake_identity_func_relay(self): - fake_quantization_to_integer.register_fake_quantization_to_integer.fu - """ + def fake_identity_func_relay( + self, + input_arg=None, + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(0, dtype="int32"), + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(0, dtype="int32"), + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", + ): + if input_arg is None: + input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype)) + + return ( + fake_quantization_to_integer.create_integer_lookup_op( + input_arg=input_arg, + floating_point_func=self.fake_identity_func_numpy, + in_scale=in_scale, + in_zero_point=in_zero_point, + out_scale=out_scale, + out_zero_point=out_zero_point, + in_axis=in_axis, + out_axis=out_axis, + in_dtype=in_dtype, + out_dtype=out_dtype, + ), + input_arg.data.numpy(), + ) + + def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): + return (np_arr.astype("int32") - np_zero_point) * np_scale def test_int8_to_int8(self): - relay_result = fake_quantization_to_integer.create_integer_lookup_table( - self.uint8_identity_func, - relay.const(1.0, dtype="float32"), - relay.const(0, dtype="int32"), - relay.const(1.0, dtype="float32"), - relay.const(0, dtype="int32"), + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_uint8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_int8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(128, dtype="int32"), in_dtype="int8", + out_dtype="uint8", + ) + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg), + self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), + ) + + def test_uint8_to_int8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(128, dtype="int32"), + in_dtype="uint8", out_dtype="int8", ) + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), + self.dequantize_numpy(result), + ) def test_fake_quantize_tanh(): From c943ff19c2356e3ca0fd8c06ed69a61c5efc22b6 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 10:18:26 -0800 Subject: [PATCH 07/72] explicitly say do not handle multi-channel lookups --- .../relay/transform/fake_quantization_to_integer.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 96f5faab9b90..f7278801fec2 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -167,12 +167,20 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() out_scale = out_scale.data.numpy().item() out_zero_point = out_zero_point.data.numpy().item() + # TODO: handle multi-channel q + if ( + in_scale.size() > 1 + or out_scale.size() > 1 + or in_zero_point.size() > 1 + or out_zero_point.size() > 1 + ): + raise ValueError("Do no support multi-channel quantization for now") + lookup_table = create_integer_lookup_table( floating_point_func, relay.const(in_scale), From 20737402cba0a4c5a6ab75bb557273d12296c64c Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:27:56 -0800 Subject: [PATCH 08/72] add example funcs --- .../transform/fake_quantization_to_integer.py | 6 ++- .../test_pass_fake_quantization_to_integer.py | 45 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index f7278801fec2..c595202ea541 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -169,6 +169,7 @@ def create_integer_lookup_op( """ in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() out_zero_point = out_zero_point.data.numpy().item() @@ -179,7 +180,7 @@ def create_integer_lookup_op( or in_zero_point.size() > 1 or out_zero_point.size() > 1 ): - raise ValueError("Do no support multi-channel quantization for now") + raise ValueError("Do not support multi-channel quantization for now") lookup_table = create_integer_lookup_table( floating_point_func, @@ -243,6 +244,9 @@ def func(expr, type_map): register_unary_elementwise_table_lookup_op("tanh", np.tanh) +register_unary_elementwise_table_lookup_op("erf", np.math.erf) +register_unary_elementwise_table_lookup_op("exp", np.math.exp) +register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) register_unary_identity("reshape") register_unary_identity("squeeze") diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 714dce4e6186..a7ab19bd059f 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -682,6 +682,51 @@ def test_fake_quantize_tanh(): compare_fq_to_int(op, [x_np]) +def test_fake_quantize_erf(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.erf(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_exp(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.exp(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_sigmoid(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.sigmoid(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + def test_fq_hard_fail(): @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11) def conv2d(expr, type_map): # pylint: disable=unused-variable From 11674d30d6071800723a0713c9f9c2b679c68100 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:34:53 -0800 Subject: [PATCH 09/72] fix silent fail --- .../relay/transform/fake_quantization_to_integer.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index c595202ea541..63dabf98088e 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -167,21 +167,13 @@ def create_integer_lookup_op( """ TODO """ + + # TODO: handle multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() - out_scale = out_scale.data.numpy().item() out_zero_point = out_zero_point.data.numpy().item() - # TODO: handle multi-channel q - if ( - in_scale.size() > 1 - or out_scale.size() > 1 - or in_zero_point.size() > 1 - or out_zero_point.size() > 1 - ): - raise ValueError("Do not support multi-channel quantization for now") - lookup_table = create_integer_lookup_table( floating_point_func, relay.const(in_scale), From 67baa39944a63ba9648a59805f2e35aedd083934 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:46:13 -0800 Subject: [PATCH 10/72] fix some bugs with floating point funcs not working --- python/tvm/relay/transform/fake_quantization_to_integer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 63dabf98088e..2e293d44fc7d 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -17,6 +17,7 @@ """Relay functions for rewriting fake quantized ops.""" import numpy as np import tvm +from scipy import special from tvm import relay from tvm.ir import TensorAffineType, TupleAffineType from tvm.tir import bijective_layout @@ -167,7 +168,6 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() @@ -236,8 +236,8 @@ def func(expr, type_map): register_unary_elementwise_table_lookup_op("tanh", np.tanh) -register_unary_elementwise_table_lookup_op("erf", np.math.erf) -register_unary_elementwise_table_lookup_op("exp", np.math.exp) +register_unary_elementwise_table_lookup_op("erf", special.erf) +register_unary_elementwise_table_lookup_op("exp", np.exp) register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) register_unary_identity("reshape") From 47e4b5ce91d0a475687d08914fc2bce8e8de9ea8 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:48:56 -0800 Subject: [PATCH 11/72] add TODO --- python/tvm/relay/transform/fake_quantization_to_integer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 2e293d44fc7d..d6a9d534767a 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -235,6 +235,7 @@ def func(expr, type_map): return register_fake_quantization_to_integer(op_name, func) +# TODO: better error messages if reference functions fail in FQ2I pass register_unary_elementwise_table_lookup_op("tanh", np.tanh) register_unary_elementwise_table_lookup_op("erf", special.erf) register_unary_elementwise_table_lookup_op("exp", np.exp) From 446e25abd27ddaef7edb85f5380ee3f03e16299f Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Fri, 21 Jan 2022 16:20:38 -0800 Subject: [PATCH 12/72] add tood --- python/tvm/relay/transform/fake_quantization_to_integer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index d6a9d534767a..432870dc98dc 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -111,6 +111,9 @@ def create_integer_lookup_table( in_dtype="uint8", out_dtype="uint8", ): + """ + TODO + """ if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( np.dtype(out_dtype), np.integer ): From 87e265c54efd119d94233de00ed2912ee29a19a0 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 12:50:19 -0800 Subject: [PATCH 13/72] canonicalizations --- python/tvm/relay/qnn/op/__init__.py | 4 ++-- python/tvm/relay/qnn/op/canonicalizations.py | 0 python/tvm/relay/qnn/op/op.py | 25 +++++++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 python/tvm/relay/qnn/op/canonicalizations.py diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py index 848409360a9d..745050e286e8 100644 --- a/python/tvm/relay/qnn/op/__init__.py +++ b/python/tvm/relay/qnn/op/__init__.py @@ -18,5 +18,5 @@ """QNN dialect related operators.""" from __future__ import absolute_import as _abs from .qnn import * -from .op import register_qnn_legalize -from . import _qnn, legalizations, layout_conversions +from .op import register_qnn_legalize, register_qnn_canonicalize +from . import _qnn, legalizations, layout_conversions, canonicalizations diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py index 32a61229951c..c83a32e2ce6a 100644 --- a/python/tvm/relay/qnn/op/op.py +++ b/python/tvm/relay/qnn/op/op.py @@ -20,7 +20,10 @@ def register_qnn_legalize(op_name, legal_op=None, level=10): - """Register legal transformation function for a QNN op + """Register legal transformation function for a QNN op. + + This helps QNN match hardware intrinsics better and is run before + canonicalization. Parameters ---------- @@ -34,3 +37,23 @@ def register_qnn_legalize(op_name, legal_op=None, level=10): The priority level """ return tvm.ir.register_op_attr(op_name, "FTVMQnnLegalize", legal_op, level) + + +def register_qnn_canonicalize(op_name, legal_op=None, level=10): + """Register canonicalization function for a QNN op. + + This transforms QNN ops to mainline Relay components. + + Parameters + ---------- + op_name : str + The name of the operator + + legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr + The function for transforming an expr to another expr. + + level : int + The priority level + """ + + return tvm.ir.register_op_attr(op_name, "FTVMQnnCanonicalize", legal_op, level) From 400880cd09cb1decf494b1b2ea754b2b1566e584 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:02:57 -0800 Subject: [PATCH 14/72] refactor integer lookup ops into own folder --- python/tvm/relay/qnn/op/canonicalizations.py | 121 ++++++++++++++ .../transform/fake_quantization_to_integer.py | 151 ------------------ .../relay/qnn/test_canonicalizations.py | 146 +++++++++++++++++ .../test_pass_fake_quantization_to_integer.py | 140 ---------------- 4 files changed, 267 insertions(+), 291 deletions(-) create mode 100644 tests/python/relay/qnn/test_canonicalizations.py diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index e69de29bb2d1..d419e3eb7a8e 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -0,0 +1,121 @@ +from typing import Callable + +import numpy as np +import tvm +from tvm import relay + + +# TODO: replace with constant folding +def run_const_expr(expr: "relay.Expr") -> np.ndarray: + mod = tvm.IRModule.from_expr(expr) + vm_exe = relay.create_executor("vm", mod=mod) + return vm_exe.evaluate()().asnumpy() + + +def create_integer_lookup_table( + floating_point_func: Callable[[np.ndarray], np.ndarray], + input_scale: "relay.Expr", + input_zero_point: "relay.Expr", + output_scale: "relay.Expr", + output_zero_point: "relay.Expr", + in_axis: int = -1, + out_axis: int = -1, + in_dtype: str = "uint8", + out_dtype: str = "uint8", +) -> np.ndarray: + """ + TODO + """ + if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( + np.dtype(out_dtype), np.integer + ): + raise ValueError( + f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes." + ) + + dtype_info = np.iinfo(in_dtype) + + num_bits = dtype_info.bits + + # Use TVMs quantization methods via relay to be consistent + # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + + # First generate a list of all num_bit integer patterns + inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}") + + # Reinterpret bits as the real datatype + # Note what we are doing here is a bit tricky, the canonical view of our lookup table + # is using the uintX version. When we run the lookup in the relay graph, we cast the + # bit pattern back into this form. + inputs_quantized = inputs_quantized.view(in_dtype) + inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) + inputs_dequantized = run_const_expr( + relay.qnn.op.dequantize( + inputs_quantized, + input_scale=input_scale, + input_zero_point=input_zero_point, + axis=in_axis, + ) + ) + + output_dequantized = relay.const(floating_point_func(inputs_dequantized)) + output_quantized = run_const_expr( + relay.qnn.op.quantize( + output_dequantized, output_scale, output_zero_point, out_axis, out_dtype + ) + ) + + return output_quantized + + +def create_integer_lookup_op( + input_arg: "relay.Expr", + floating_point_func: Callable[[np.array], np.array], + in_scale: "relay.Expr", + in_zero_point: "relay.Expr", + out_scale: "relay.Expr", + out_zero_point: "relay.Expr", + in_axis: int = -1, + out_axis: int = -1, + in_dtype: str = "uint8", + out_dtype: str = "uint8", +) -> "relay.Expr": + """ + TODO + """ + # TODO: handle multi-channel q + in_scale = in_scale.data.numpy().item() + in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() + out_zero_point = out_zero_point.data.numpy().item() + + lookup_table = create_integer_lookup_table( + floating_point_func, + relay.const(in_scale), + relay.const(in_zero_point, dtype="int32"), + relay.const(out_scale), + relay.const(out_zero_point, dtype="int32"), + in_axis=in_axis, + in_dtype=in_dtype, + out_axis=out_axis, + out_dtype=out_dtype, + ) + + in_dtype_info = np.iinfo(in_dtype) + in_dtype_num_bits = in_dtype_info.bits + + lookup_table = relay.const(lookup_table) + index_tensor = relay.reshape(input_arg, [-1]) + index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") + result = relay.gather(lookup_table, -1, index_tensor) + result = relay.reshape_like(result, input_arg) + return result + + +""" +# TODO: better error messages if reference functions fail in FQ2I pass +register_unary_elementwise_table_lookup_op("tanh", np.tanh) +register_unary_elementwise_table_lookup_op("erf", special.erf) +register_unary_elementwise_table_lookup_op("exp", np.exp) +register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) +""" diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 432870dc98dc..9fdc2186a397 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -93,157 +93,6 @@ def identity(expr, type_map): return register_fake_quantization_to_integer(op_name, identity) -# TODO: replace with constant folding -def run_const_expr(expr): - mod = tvm.IRModule.from_expr(expr) - vm_exe = relay.create_executor("vm", mod=mod) - return vm_exe.evaluate()().asnumpy() - - -def create_integer_lookup_table( - floating_point_func, - input_scale, - input_zero_point, - output_scale, - output_zero_point, - in_axis=-1, - out_axis=-1, - in_dtype="uint8", - out_dtype="uint8", -): - """ - TODO - """ - if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( - np.dtype(out_dtype), np.integer - ): - raise ValueError( - f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes." - ) - - dtype_info = np.iinfo(in_dtype) - - num_bits = dtype_info.bits - - # Use TVMs quantization methods via relay to be consistent - # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) - - # First generate a list of all num_bit integer patterns - inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}") - - # Reinterpret bits as the real datatype - # Note what we are doing here is a bit tricky, the canonical view of our lookup table - # is using the uintX version. When we run the lookup in the relay graph, we cast the - # bit pattern back into this form. - inputs_quantized = inputs_quantized.view(in_dtype) - inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) - inputs_dequantized = run_const_expr( - relay.qnn.op.dequantize( - inputs_quantized, - input_scale=input_scale, - input_zero_point=input_zero_point, - axis=in_axis, - ) - ) - - output_dequantized = relay.const(floating_point_func(inputs_dequantized)) - output_quantized = run_const_expr( - relay.qnn.op.quantize( - output_dequantized, output_scale, output_zero_point, out_axis, out_dtype - ) - ) - - return output_quantized - - -def create_integer_lookup_op( - input_arg, - floating_point_func, - in_scale, - in_zero_point, - out_scale, - out_zero_point, - in_axis=-1, - out_axis=-1, - in_dtype="uint8", - out_dtype="uint8", -): - """ - TODO - """ - # TODO: handle multi-channel q - in_scale = in_scale.data.numpy().item() - in_zero_point = in_zero_point.data.numpy().item() - out_scale = out_scale.data.numpy().item() - out_zero_point = out_zero_point.data.numpy().item() - - lookup_table = create_integer_lookup_table( - floating_point_func, - relay.const(in_scale), - relay.const(in_zero_point, dtype="int32"), - relay.const(out_scale), - relay.const(out_zero_point, dtype="int32"), - in_axis=in_axis, - in_dtype=in_dtype, - out_axis=out_axis, - out_dtype=out_dtype, - ) - - in_dtype_info = np.iinfo(in_dtype) - in_dtype_num_bits = in_dtype_info.bits - - lookup_table = relay.const(lookup_table) - index_tensor = relay.reshape(input_arg, [-1]) - index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") - result = relay.gather(lookup_table, -1, index_tensor) - result = relay.reshape_like(result, input_arg) - return result - - -def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): - """Implement an operator in quantized space via table lookup operations (e.g. via gather). - - op_name: str - The name of the operator to register for FQ2I. - - example_func: Callable[[np.ndarray], np.ndarray] - The FP32 version of the function to quantize operating on numpy arrays. - """ - - def func(expr, type_map): - assert len(expr.args) == 1, "only support elemwise ops for now!" - arg = expr.args[0] - in_scale = fold_constant(type_map[arg].scale) - in_zero_point = fold_constant(type_map[arg].zero_point) - out_scale = fold_constant(type_map[expr].scale) - out_zero_point = fold_constant(type_map[expr].zero_point) - in_axis = type_map[arg].axis - in_dtype = type_map[arg].dtype - out_axis = type_map[expr].axis - out_dtype = type_map[expr].dtype - result = create_integer_lookup_op( - input_arg=arg, - floating_point_func=floating_point_func, - in_scale=in_scale, - in_zero_point=in_zero_point, - out_scale=out_scale, - out_zero_point=out_zero_point, - in_axis=in_axis, - in_dtype=in_dtype, - out_axis=out_axis, - out_dtype=out_dtype, - ) - return [result, type_map[expr]] - - return register_fake_quantization_to_integer(op_name, func) - - -# TODO: better error messages if reference functions fail in FQ2I pass -register_unary_elementwise_table_lookup_op("tanh", np.tanh) -register_unary_elementwise_table_lookup_op("erf", special.erf) -register_unary_elementwise_table_lookup_op("exp", np.exp) -register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) - register_unary_identity("reshape") register_unary_identity("squeeze") register_unary_identity("strided_slice") diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py new file mode 100644 index 000000000000..aad6bb3ede19 --- /dev/null +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -0,0 +1,146 @@ +import numpy as np +import tvm +from tvm import relay +from tvm.relay.qnn.op import canonicalizations + + +class TestIntegerTableLookupTable: + """Consists of tests testing functionality of creating lookup tables for integer operations.""" + + # def __init__(self) -> None: + # self.input = np.arange(start=0, stop=256, dtype="uint8") + + def fake_identity_func_numpy(self, arr: np.ndarray): + return arr.astype("float32") + + def fake_identity_func_relay( + self, + input_arg=None, + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(0, dtype="int32"), + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(0, dtype="int32"), + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", + ): + if input_arg is None: + input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype)) + + return ( + canonicalizations.create_integer_lookup_op( + input_arg=input_arg, + floating_point_func=self.fake_identity_func_numpy, + in_scale=in_scale, + in_zero_point=in_zero_point, + out_scale=out_scale, + out_zero_point=out_zero_point, + in_axis=in_axis, + out_axis=out_axis, + in_dtype=in_dtype, + out_dtype=out_dtype, + ), + input_arg.data.numpy(), + ) + + def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): + return (np_arr.astype("int32") - np_zero_point) * np_scale + + def test_int8_to_int8(self): + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_uint8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_int8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(128, dtype="int32"), + in_dtype="int8", + out_dtype="uint8", + ) + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg), + self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), + ) + + def test_uint8_to_int8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(128, dtype="int32"), + in_dtype="uint8", + out_dtype="int8", + ) + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), + self.dequantize_numpy(result), + ) + + +""" +def test_fake_quantize_tanh(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.tanh(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_erf(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.erf(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_exp(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.exp(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_sigmoid(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.sigmoid(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) +""" diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index a7ab19bd059f..28166bb8be72 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -587,146 +587,6 @@ def run_test_case(partial_func): run_test_case(lambda x: relay.op.min(x, axis=1)) -class TestIntegerTableLookupTable: - """Consists of tests testing functionality of creating lookup tables for integer operations.""" - - # def __init__(self) -> None: - # self.input = np.arange(start=0, stop=256, dtype="uint8") - - def fake_identity_func_numpy(self, arr: np.ndarray): - return arr.astype("float32") - - def fake_identity_func_relay( - self, - input_arg=None, - in_scale=relay.const(1.0, dtype="float32"), - in_zero_point=relay.const(0, dtype="int32"), - out_scale=relay.const(1.0, dtype="float32"), - out_zero_point=relay.const(0, dtype="int32"), - in_axis=-1, - out_axis=-1, - in_dtype="uint8", - out_dtype="uint8", - ): - if input_arg is None: - input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype)) - - return ( - fake_quantization_to_integer.create_integer_lookup_op( - input_arg=input_arg, - floating_point_func=self.fake_identity_func_numpy, - in_scale=in_scale, - in_zero_point=in_zero_point, - out_scale=out_scale, - out_zero_point=out_zero_point, - in_axis=in_axis, - out_axis=out_axis, - in_dtype=in_dtype, - out_dtype=out_dtype, - ), - input_arg.data.numpy(), - ) - - def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): - return (np_arr.astype("int32") - np_zero_point) * np_scale - - def test_int8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) - - def test_uint8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) - - def test_int8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - out_scale=relay.const(1.0, dtype="float32"), - out_zero_point=relay.const(128, dtype="int32"), - in_dtype="int8", - out_dtype="uint8", - ) - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg), - self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), - ) - - def test_uint8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - in_scale=relay.const(1.0, dtype="float32"), - in_zero_point=relay.const(128, dtype="int32"), - in_dtype="uint8", - out_dtype="int8", - ) - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), - self.dequantize_numpy(result), - ) - - -def test_fake_quantize_tanh(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.tanh(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_erf(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.erf(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_exp(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.exp(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_sigmoid(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.sigmoid(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - def test_fq_hard_fail(): @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11) def conv2d(expr, type_map): # pylint: disable=unused-variable From 3d26528df0877c42dabfdd7aac96627320dc8d06 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:09:33 -0800 Subject: [PATCH 15/72] fq2i stuff --- python/tvm/relay/transform/fake_quantization_to_integer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 9fdc2186a397..3337c7cfb894 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -17,9 +17,11 @@ """Relay functions for rewriting fake quantized ops.""" import numpy as np import tvm -from scipy import special from tvm import relay from tvm.ir import TensorAffineType, TupleAffineType + +# import to register canonicalization funcs for fq2i +from tvm.relay.qnn.op import canonicalizations from tvm.tir import bijective_layout from ..op import register_fake_quantization_to_integer From e60f2b4dd418697b15a44d73ebcfd24e5b7d916a Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:33:35 -0800 Subject: [PATCH 16/72] clean up existing tests --- python/tvm/relay/qnn/op/canonicalizations.py | 2 +- .../relay/qnn/test_canonicalizations.py | 90 ++++++++++++++----- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index d419e3eb7a8e..334b824f8b50 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -5,8 +5,8 @@ from tvm import relay -# TODO: replace with constant folding def run_const_expr(expr: "relay.Expr") -> np.ndarray: + """Run a const expression, receiving result as np array.""" mod = tvm.IRModule.from_expr(expr) vm_exe = relay.create_executor("vm", mod=mod) return vm_exe.evaluate()().asnumpy() diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index aad6bb3ede19..0f9939f1cbd8 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -7,9 +7,6 @@ class TestIntegerTableLookupTable: """Consists of tests testing functionality of creating lookup tables for integer operations.""" - # def __init__(self) -> None: - # self.input = np.arange(start=0, stop=256, dtype="uint8") - def fake_identity_func_numpy(self, arr: np.ndarray): return arr.astype("float32") @@ -47,40 +44,85 @@ def fake_identity_func_relay( def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): return (np_arr.astype("int32") - np_zero_point) * np_scale - def test_int8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") + def run_identity_function_test( + self, + in_scale: float, + in_zero_point: int, + out_scale: float, + out_zero_point: int, + in_dtype: str, + out_dtype: str, + rtol=1e-7, + atol=0, + ): + relay_lookup, input_arg = self.fake_identity_func_relay( + in_scale=relay.const(in_scale, "float32"), + in_zero_point=relay.const(in_zero_point, "int32"), + out_scale=relay.const(out_scale, "float32"), + out_zero_point=relay.const(out_zero_point, "int32"), + in_dtype=in_dtype, + out_dtype=out_dtype, + ) result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + np.testing.assert_allclose( + self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point), + self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point), + atol=atol, + rtol=rtol, + ) + + def test_int8_to_int8(self): + """Test int8 input to int8 output mapping workings""" + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=0, + out_scale=1.0, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + ) def test_uint8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") - result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=128, + out_scale=1.0, + out_zero_point=128, + in_dtype="uint8", + out_dtype="uint8", + ) def test_int8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - out_scale=relay.const(1.0, dtype="float32"), - out_zero_point=relay.const(128, dtype="int32"), + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=0, + out_scale=1.0, + out_zero_point=128, in_dtype="int8", out_dtype="uint8", ) - result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg), - self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), - ) def test_uint8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - in_scale=relay.const(1.0, dtype="float32"), - in_zero_point=relay.const(128, dtype="int32"), + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=128, + out_scale=1.0, + out_zero_point=0, in_dtype="uint8", out_dtype="int8", ) - result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), - self.dequantize_numpy(result), + + def test_different_in_out_qparams(self): + """Test mapping with different in/out qparams works.""" + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=128, + out_scale=1.0, + out_zero_point=128, + in_dtype="uint8", + out_dtype="uint8", + atol=1, # numbers range from -128 -> 128 so not that big error + rtol=0, ) From 8bd0b44d44b26c470076e5919c460e9250609851 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:34:08 -0800 Subject: [PATCH 17/72] flesh out todo --- python/tvm/relay/qnn/op/canonicalizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 334b824f8b50..19ee1b131908 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -83,7 +83,7 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q + # TODO: handle multi-channel q, if below fails it's probably that in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() out_scale = out_scale.data.numpy().item() From daef1501809db3709305b662f50c7580f6d2993c Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 14:14:13 -0800 Subject: [PATCH 18/72] more tests --- .../relay/qnn/test_canonicalizations.py | 121 ++++++++---------- 1 file changed, 52 insertions(+), 69 deletions(-) diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index 0f9939f1cbd8..e13d96885051 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -1,6 +1,9 @@ +from typing import Callable + import numpy as np import tvm from tvm import relay +from tvm.relay.op.transform import arange from tvm.relay.qnn.op import canonicalizations @@ -13,6 +16,7 @@ def fake_identity_func_numpy(self, arr: np.ndarray): def fake_identity_func_relay( self, input_arg=None, + floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, in_scale=relay.const(1.0, dtype="float32"), in_zero_point=relay.const(0, dtype="int32"), out_scale=relay.const(1.0, dtype="float32"), @@ -28,7 +32,7 @@ def fake_identity_func_relay( return ( canonicalizations.create_integer_lookup_op( input_arg=input_arg, - floating_point_func=self.fake_identity_func_numpy, + floating_point_func=floating_point_func, in_scale=in_scale, in_zero_point=in_zero_point, out_scale=out_scale, @@ -44,7 +48,7 @@ def fake_identity_func_relay( def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): return (np_arr.astype("int32") - np_zero_point) * np_scale - def run_identity_function_test( + def run_function_test( self, in_scale: float, in_zero_point: int, @@ -52,10 +56,14 @@ def run_identity_function_test( out_zero_point: int, in_dtype: str, out_dtype: str, + floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, + input_arg: relay.Expr = None, rtol=1e-7, atol=0, ): relay_lookup, input_arg = self.fake_identity_func_relay( + input_arg=input_arg, + floating_point_func=floating_point_func, in_scale=relay.const(in_scale, "float32"), in_zero_point=relay.const(in_zero_point, "int32"), out_scale=relay.const(out_scale, "float32"), @@ -65,15 +73,18 @@ def run_identity_function_test( ) result = canonicalizations.run_const_expr(relay_lookup) np.testing.assert_allclose( - self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point), + floating_point_func( + self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point) + ), self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point), atol=atol, rtol=rtol, ) + """Test mapping between different input/output dtypes""" + def test_int8_to_int8(self): - """Test int8 input to int8 output mapping workings""" - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=0, out_scale=1.0, @@ -83,7 +94,7 @@ def test_int8_to_int8(self): ) def test_uint8_to_uint8(self): - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=128, out_scale=1.0, @@ -93,7 +104,7 @@ def test_uint8_to_uint8(self): ) def test_int8_to_uint8(self): - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=0, out_scale=1.0, @@ -103,7 +114,7 @@ def test_int8_to_uint8(self): ) def test_uint8_to_int8(self): - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=128, out_scale=1.0, @@ -112,9 +123,10 @@ def test_uint8_to_int8(self): out_dtype="int8", ) + """Test mapping with different in/out qparams works.""" + def test_different_in_out_qparams(self): - """Test mapping with different in/out qparams works.""" - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=128, out_scale=1.0, @@ -125,64 +137,35 @@ def test_different_in_out_qparams(self): rtol=0, ) + """Test some simple functions""" -""" -def test_fake_quantize_tanh(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.tanh(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_erf(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.erf(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_exp(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.exp(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_sigmoid(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.sigmoid(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + def test_tanh(self): + # 1 / 64 in scale -- input range is ~ (-2, 2), tanh(+-2) ~= +-1 + # 1 / 128 out_scale -- output range is ~(-1, 1) + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8")), + in_scale=1 / 64, + in_zero_point=0, + out_scale=1 / 128, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=np.tanh, + atol=0.01, + rtol=0.01, + ) - compare_fq_to_int(op, [x_np]) -""" + def test_exp(self): + # input in floating point ~[-2, 2], final output ~[0, 8] + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8")), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=np.exp, + atol=0.03, + rtol=0.01, + ) From 173e25193d0812500e11bf61bd045a01661faca0 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 14:41:35 -0800 Subject: [PATCH 19/72] test on keeping shape good --- .../relay/qnn/test_canonicalizations.py | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index e13d96885051..334c628e5e54 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -15,8 +15,8 @@ def fake_identity_func_numpy(self, arr: np.ndarray): def fake_identity_func_relay( self, + floating_point_func: Callable[[np.ndarray], np.ndarray], input_arg=None, - floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, in_scale=relay.const(1.0, dtype="float32"), in_zero_point=relay.const(0, dtype="int32"), out_scale=relay.const(1.0, dtype="float32"), @@ -56,7 +56,7 @@ def run_function_test( out_zero_point: int, in_dtype: str, out_dtype: str, - floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, + floating_point_func: Callable[[np.ndarray], np.ndarray], input_arg: relay.Expr = None, rtol=1e-7, atol=0, @@ -91,6 +91,7 @@ def test_int8_to_int8(self): out_zero_point=0, in_dtype="int8", out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, ) def test_uint8_to_uint8(self): @@ -101,6 +102,7 @@ def test_uint8_to_uint8(self): out_zero_point=128, in_dtype="uint8", out_dtype="uint8", + floating_point_func=self.fake_identity_func_numpy, ) def test_int8_to_uint8(self): @@ -111,6 +113,7 @@ def test_int8_to_uint8(self): out_zero_point=128, in_dtype="int8", out_dtype="uint8", + floating_point_func=self.fake_identity_func_numpy, ) def test_uint8_to_int8(self): @@ -121,6 +124,48 @@ def test_uint8_to_int8(self): out_zero_point=0, in_dtype="uint8", out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + ) + + """Test different input shapes""" + + def test_keep_input_shapes(self): + # input in floating point ~[-2, 2], final output ~[0, 8] + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 8, 8])), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + atol=0.03, + rtol=0.01, + ) + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 64])), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + atol=0.03, + rtol=0.01, + ) + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 128])), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + atol=0.03, + rtol=0.01, ) """Test mapping with different in/out qparams works.""" @@ -133,6 +178,7 @@ def test_different_in_out_qparams(self): out_zero_point=128, in_dtype="uint8", out_dtype="uint8", + floating_point_func=self.fake_identity_func_numpy, atol=1, # numbers range from -128 -> 128 so not that big error rtol=0, ) From c4efbfb59f8a6b78d976db7da1242ae68d8f814e Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 14:42:00 -0800 Subject: [PATCH 20/72] lookup table fix --- tests/python/relay/qnn/test_canonicalizations.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index 334c628e5e54..84e01976f4f4 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -1,9 +1,7 @@ from typing import Callable import numpy as np -import tvm from tvm import relay -from tvm.relay.op.transform import arange from tvm.relay.qnn.op import canonicalizations From ddd8dd528808823a1ba75693096ba380c35ba3e1 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 15:04:48 -0800 Subject: [PATCH 21/72] replace canonicalization for rsqrt --- python/tvm/relay/qnn/op/canonicalizations.py | 23 +++++++++++++------- src/relay/qnn/op/rsqrt.cc | 4 ++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 19ee1b131908..b0ac0b1c15c1 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -3,6 +3,7 @@ import numpy as np import tvm from tvm import relay +from tvm.relay.qnn.op.op import register_qnn_canonicalize def run_const_expr(expr: "relay.Expr") -> np.ndarray: @@ -83,7 +84,7 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q, if below fails it's probably that + # TODO: handle multi-channel q, below will fail with multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() out_scale = out_scale.data.numpy().item() @@ -112,10 +113,16 @@ def create_integer_lookup_op( return result -""" -# TODO: better error messages if reference functions fail in FQ2I pass -register_unary_elementwise_table_lookup_op("tanh", np.tanh) -register_unary_elementwise_table_lookup_op("erf", special.erf) -register_unary_elementwise_table_lookup_op("exp", np.exp) -register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) -""" +@register_qnn_canonicalize("qnn.rsqrt") +def canonicalize_rsqrt(attrs, args, arg_types): + """Canonicalization for rsqrt""" + return create_integer_lookup_op( + args[0], + lambda arr: 1 / np.sqrt(arr), + args[1], + args[2], + args[3], + args[4], + in_dtype=arg_types[0].dtype, + out_dtype=arg_types[0].dtype, + ) diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc index 55814dff422b..2bd73d758144 100644 --- a/src/relay/qnn/op/rsqrt.cc +++ b/src/relay/qnn/op/rsqrt.cc @@ -105,6 +105,7 @@ Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array& new_args, return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1); } +// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py RELAY_REGISTER_OP("qnn.rsqrt") .describe("Elementwise rsqrt for quantized tensors.") .set_num_inputs(5) @@ -116,8 +117,7 @@ RELAY_REGISTER_OP("qnn.rsqrt") "The quantization zero_point of the output tensor.") .set_support_level(11) .add_type_rel("QRsqrt", QnnRsqrtRel) - .set_attr("TNonComputational", true) - .set_attr("FTVMQnnCanonicalize", QnnRsqrtCanonicalize); + .set_attr("TNonComputational", true); TVM_REGISTER_GLOBAL("relay.qnn.op._make.rsqrt").set_body_typed(MakeQuantizedRsqrt); From f65583ae814ac96049b1a6cf61e6c1d20b8940a6 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 15:05:47 -0800 Subject: [PATCH 22/72] remove canonicalization of rsqrt --- src/relay/qnn/op/rsqrt.cc | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc index 2bd73d758144..6d37b1aa8d5d 100644 --- a/src/relay/qnn/op/rsqrt.cc +++ b/src/relay/qnn/op/rsqrt.cc @@ -69,42 +69,6 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale, return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {}); } -/* - * \brief Canonicalizes the QNN rsqrt op. - * \param attrs The empty attribute. - * \param new_args The new mutated args to the call node. - * \param arg_types The types of input and output. - * \return The sequence of Relay ops for add op. - */ -Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array& new_args, - const Array& arg_types) { - // At this time, due to the complexity of implementing this op in int8 or uint8, - // we dequantize the input, run the op in float, and then quantize the output (as below). - // This acts as a placeholder for future hardware enablement, where more hardware specific - // canonicalization can be provided. - - // Get the args. - QnnUnaryOpArguments args(new_args); - - // Get the input dtype and shape. - QnnUnaryOpTensorType input_type(arg_types, 0); - - // Get the types for dequantize/quantize. - Array types; - for (size_t i = 1; i < 5; ++i) { - types.push_back(arg_types[i]); - } - - // Dequantize input. - auto dequantized_arg = Dequantize(args.x, args.scale, args.zero_point, types, -1); - - // Compute Rsqrt(Q_x') - auto output = Rsqrt(dequantized_arg); - - // Quantize output. - return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1); -} - // Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py RELAY_REGISTER_OP("qnn.rsqrt") .describe("Elementwise rsqrt for quantized tensors.") From 0b8dc75a6bd1fca355e137ebeee668c5102c4862 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 20:55:19 -0800 Subject: [PATCH 23/72] add asf headers --- python/tvm/relay/qnn/op/canonicalizations.py | 16 ++++++++++++++++ tests/python/relay/qnn/test_canonicalizations.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index b0ac0b1c15c1..23c5e37f7ebe 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. from typing import Callable import numpy as np diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index 84e01976f4f4..0505a88c07bd 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. from typing import Callable import numpy as np From 3c29f6b92f8904697226df3fea4f8c4ed2744a52 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 13:45:04 -0800 Subject: [PATCH 24/72] topi tests --- tests/python/topi/python/test_topi_transform.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index 42d2463b8952..622d6946e913 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -18,14 +18,11 @@ import numpy as np import pytest import tvm -from tvm import te -from tvm import topi -from tvm import relay +import tvm.testing import tvm.topi.testing +from tvm import relay, te, topi from tvm.contrib.nvcc import have_fp16 -import tvm.testing - def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = te.placeholder(shape=in_shape, name="A") @@ -1010,6 +1007,16 @@ def test_gather(): verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5))) verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2))) verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10))) + verify_gather( + np.random.randn(4, 7, 5), + 2, + np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint32"), + ) + verify_gather( + np.random.randn(4, 7, 5), + 2, + np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint8"), + ) @tvm.testing.uses_gpu From eda9f19172426a184e2be845d3957390b0bdc3c7 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 13:51:14 -0800 Subject: [PATCH 25/72] gather supports unsigned integer tests --- tests/python/relay/test_op_level3.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 6d6a2a9b65ed..8c76dff0a0f5 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -21,10 +21,8 @@ import numpy as np import pytest - import tvm import tvm.testing - from tvm import relay, te from tvm.error import TVMError from tvm.relay import create_executor, transform @@ -32,7 +30,6 @@ from utils import ref_funcs - executor_kind = tvm.testing.parameter("graph", "debug") @@ -1267,12 +1264,12 @@ def test_scatter_add(self, target, dev, ref_data, dshape, ishape, axis, dtype): ], ) def test_gather(target, dev, executor_kind, data, axis, indices, ref_res): - def verify_gather(data, axis, indices, ref_res): + def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"): data = np.asarray(data, dtype="float32") - indices = np.asarray(indices, dtype="int32") + indices = np.asarray(indices, dtype=indices_dtype) ref_res = np.asarray(ref_res) d = relay.var("x", relay.TensorType(data.shape, "float32")) - i = relay.var("y", relay.TensorType(indices.shape, "int32")) + i = relay.var("y", relay.TensorType(indices.shape, indices_dtype)) z = relay.gather(d, axis, i) func = relay.Function([d, i], z) @@ -1283,6 +1280,7 @@ def verify_gather(data, axis, indices, ref_res): tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) verify_gather(data, axis, indices, ref_res) + verify_gather(data, axis, indices, ref_res, indices_dtype="uint32") def test_gather_nd(target, dev, executor_kind): From ab25dc0f55fab88792bf6d3d2d22ca88589cca7e Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 14:10:24 -0800 Subject: [PATCH 26/72] fix things --- python/tvm/relay/qnn/op/canonicalizations.py | 51 ++++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 23c5e37f7ebe..db8b71e68998 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -23,7 +23,7 @@ def run_const_expr(expr: "relay.Expr") -> np.ndarray: - """Run a const expression, receiving result as np array.""" + """Evaluate a const expression, receiving result as np array.""" mod = tvm.IRModule.from_expr(expr) vm_exe = relay.create_executor("vm", mod=mod) return vm_exe.evaluate()().asnumpy() @@ -41,7 +41,24 @@ def create_integer_lookup_table( out_dtype: str = "uint8", ) -> np.ndarray: """ - TODO + Return a table where each input indexes to the quantized output approximating the given function. + + Note this also supports mapping unsigned and signed integers to each other. + + Args: + floating_point_func: The numpy function which this table is to approximate + input_scale: The scale of the quantized input tensor. + input_zero_point: The zero point of the quantized input tensor. + output_scale: The scale of the quantized output tensor. + output_zero_point: The zero point of the quantized output tensor. + in_axis: The axis for multi-channel quantization of the input if applicable. + out_axis: The axis for multi-channel quantization of the output if applicable. + in_dtype: The dtype of the input tensor. + out_dtype: The wanted dtype of the output tensor. + + Returns: + A numpy array where values in quantized space will index to the output in quantized space + approximating the given function. """ if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( np.dtype(out_dtype), np.integer @@ -98,8 +115,24 @@ def create_integer_lookup_op( out_dtype: str = "uint8", ) -> "relay.Expr": """ - TODO + Create a quantized version of the given floating point unary operation using table lookup. + + Args: + input_arg: The quantized input to the final function. + floating_point_func: The numpy function which this table is to approximate + in_scale: The scale of the quantized input tensor. + in_zero_point: The zero point of the quantized input tensor. + out_scale: The scale of the quantized output tensor. + out_zero_point: The zero point of the quantized output tensor. + in_axis: The axis for multi-channel quantization of the input if applicable. + out_axis: The axis for multi-channel quantization of the output if applicable. + in_dtype: The dtype of the input tensor. + out_dtype: The wanted dtype of the output tensor. + + Returns: + A Relay expression representing a quantized version of the given function. """ + # TODO: handle multi-channel q, below will fail with multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() @@ -133,12 +166,12 @@ def create_integer_lookup_op( def canonicalize_rsqrt(attrs, args, arg_types): """Canonicalization for rsqrt""" return create_integer_lookup_op( - args[0], - lambda arr: 1 / np.sqrt(arr), - args[1], - args[2], - args[3], - args[4], + input_arg=args[0], + floating_point_func=lambda arr: 1 / np.sqrt(arr), + in_scale=args[1], + in_zero_point=args[2], + out_scale=args[3], + out_zero_point=args[4], in_dtype=arg_types[0].dtype, out_dtype=arg_types[0].dtype, ) From fcc83131feb2b515450301a59809a9a4d74894c1 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 14:37:40 -0800 Subject: [PATCH 27/72] move to legalization --- python/tvm/relay/qnn/op/canonicalizations.py | 15 --------------- python/tvm/relay/qnn/op/legalizations.py | 20 ++++++++++++++++++-- src/relay/qnn/op/rsqrt.cc | 4 +++- tests/python/relay/test_op_qnn_rsqrt.py | 4 +++- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index db8b71e68998..bd0108d72e64 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -160,18 +160,3 @@ def create_integer_lookup_op( result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, input_arg) return result - - -@register_qnn_canonicalize("qnn.rsqrt") -def canonicalize_rsqrt(attrs, args, arg_types): - """Canonicalization for rsqrt""" - return create_integer_lookup_op( - input_arg=args[0], - floating_point_func=lambda arr: 1 / np.sqrt(arr), - in_scale=args[1], - in_zero_point=args[2], - out_scale=args[3], - out_zero_point=args[4], - in_dtype=arg_types[0].dtype, - out_dtype=arg_types[0].dtype, - ) diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 52fe6c8ebe2f..947e9d823134 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -17,12 +17,13 @@ # pylint: disable=invalid-name, unused-argument """Backend QNN related feature registration""" import numpy as np - import tvm from tvm import relay from tvm._ffi.base import TVMError -from .. import op as reg +from tvm.relay.qnn.op.canonicalizations import create_integer_lookup_op + from ....topi.x86.utils import target_has_sse42 +from .. import op as reg ################################################# # Register the functions for different operators. @@ -46,6 +47,21 @@ def legalize_qnn_dense(attrs, inputs, types): return qnn_dense_legalize(attrs, inputs, types) +# Registering QNN dense legalization function. +@reg.register_qnn_legalize("qnn.rsqrt") +def legalize_qnn_dense(attrs, inputs, types): + return create_integer_lookup_op( + input_arg=inputs[0], + floating_point_func=lambda arr: 1 / np.sqrt(arr), + in_scale=inputs[1], + in_zero_point=inputs[2], + out_scale=inputs[3], + out_zero_point=inputs[4], + in_dtype=types[0].dtype, + out_dtype=types[0].dtype, + ) + + # Default to None. If overridden by target, this will not be run. # Generic QNN Conv2D legalization function. @tvm.target.generic_func diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc index 6d37b1aa8d5d..93baa308a796 100644 --- a/src/relay/qnn/op/rsqrt.cc +++ b/src/relay/qnn/op/rsqrt.cc @@ -69,7 +69,9 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale, return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {}); } -// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py +// Translation to relay is done via canonicalization/legalization functions in python +// e.g. python/tvm/relay/qnn/op/canonicalizations.py or +// python/tvm/relay/qnn/op/legalizations.py RELAY_REGISTER_OP("qnn.rsqrt") .describe("Elementwise rsqrt for quantized tensors.") .set_num_inputs(5) diff --git a/tests/python/relay/test_op_qnn_rsqrt.py b/tests/python/relay/test_op_qnn_rsqrt.py index 1eb9b64057ca..0e40768343bd 100644 --- a/tests/python/relay/test_op_qnn_rsqrt.py +++ b/tests/python/relay/test_op_qnn_rsqrt.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -import tvm import numpy as np +import tvm from tvm import relay @@ -51,6 +51,7 @@ def test_saturation(): func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) + mod = relay.qnn.transform.Legalize()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] @@ -77,6 +78,7 @@ def test_saturation(): func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) + mod = relay.qnn.transform.Legalize()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] From 72e150fb685a67700e12dc439eacd23567949f1b Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 11:06:40 -0800 Subject: [PATCH 28/72] jostle ci From 19de289f7ee901dd2d519261158b5ed1ccc28e32 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 16:32:17 -0800 Subject: [PATCH 29/72] linting --- python/tvm/relay/qnn/op/canonicalizations.py | 4 ++-- python/tvm/relay/qnn/op/legalizations.py | 2 +- python/tvm/relay/qnn/op/op.py | 2 +- python/tvm/relay/transform/fake_quantization_to_integer.py | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index bd0108d72e64..05b68e731239 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -14,12 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +"""Consist of utilities and methods for lowering QNN into mainline relay.""" from typing import Callable import numpy as np import tvm from tvm import relay -from tvm.relay.qnn.op.op import register_qnn_canonicalize def run_const_expr(expr: "relay.Expr") -> np.ndarray: @@ -41,7 +41,7 @@ def create_integer_lookup_table( out_dtype: str = "uint8", ) -> np.ndarray: """ - Return a table where each input indexes to the quantized output approximating the given function. + Return a table where each input indexes to the output quantizing the given function. Note this also supports mapping unsigned and signed integers to each other. diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 947e9d823134..fd835d72fc09 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -49,7 +49,7 @@ def legalize_qnn_dense(attrs, inputs, types): # Registering QNN dense legalization function. @reg.register_qnn_legalize("qnn.rsqrt") -def legalize_qnn_dense(attrs, inputs, types): +def legalize_qnn_rsqrt(attrs, inputs, types): return create_integer_lookup_op( input_arg=inputs[0], floating_point_func=lambda arr: 1 / np.sqrt(arr), diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py index c83a32e2ce6a..335947b9f7ce 100644 --- a/python/tvm/relay/qnn/op/op.py +++ b/python/tvm/relay/qnn/op/op.py @@ -49,7 +49,7 @@ def register_qnn_canonicalize(op_name, legal_op=None, level=10): op_name : str The name of the operator - legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr + legal_op: function (Attrs, List[Expr], List[relay.Type]) -> Expr The function for transforming an expr to another expr. level : int diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 3337c7cfb894..7398dc98c83d 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -21,6 +21,7 @@ from tvm.ir import TensorAffineType, TupleAffineType # import to register canonicalization funcs for fq2i +# pylint: disable=unused-import from tvm.relay.qnn.op import canonicalizations from tvm.tir import bijective_layout From 76fb6bcb6a30193de55d275af39b9a52c3b3f7dd Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:07:55 -0800 Subject: [PATCH 30/72] use take instead of gather --- python/tvm/relay/qnn/op/canonicalizations.py | 6 ++---- src/relay/op/tensor/transform.cc | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 05b68e731239..95e0cb60368d 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -155,8 +155,6 @@ def create_integer_lookup_op( in_dtype_num_bits = in_dtype_info.bits lookup_table = relay.const(lookup_table) - index_tensor = relay.reshape(input_arg, [-1]) - index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") - result = relay.gather(lookup_table, -1, index_tensor) - result = relay.reshape_like(result, input_arg) + index_tensor = relay.reinterpret(input_arg, f"uint{in_dtype_num_bits}") + result = relay.take(lookup_table, index_tensor, axis=0, mode="fast") return result diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index ebf8a0653b70..a5b9f343d664 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1276,7 +1276,8 @@ bool TakeRel(const Array& types, int num_inputs, const Attrs& attrs, if (indices == nullptr) { return false; } - ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer"; + ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()) + << "indices of take must be tensor of integer"; const auto param = attrs.as(); ICHECK(param != nullptr); From 46f82c06d9b98070a797279fe19a94db0b1fa673 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:10:46 -0800 Subject: [PATCH 31/72] remove gather changes --- include/tvm/topi/transform.h | 2 +- src/relay/op/tensor/transform.cc | 3 +-- tests/python/relay/test_op_level3.py | 7 +++---- tests/python/topi/python/test_topi_transform.py | 10 ---------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h index 83efd1c5825d..1ad9d7da72ba 100644 --- a/include/tvm/topi/transform.h +++ b/include/tvm/topi/transform.h @@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices, size_t indices_dim_i = static_cast(GetConstInt(indices->shape[axis])); ICHECK_GE(indices_dim_i, 1); } - ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()); + ICHECK(indices->dtype.is_int()); Array out_shape; for (size_t i = 0; i < ndim_i; ++i) { diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index a5b9f343d664..d56150d8aa78 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3319,8 +3319,7 @@ bool GatherRel(const Array& types, int num_inputs, const Attrs& attrs, << "Gather: expect indices type to be TensorType but get " << types[1]; return false; } - ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()) - << "indices of gather must be tensor of integer"; + ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer"; const auto param = attrs.as(); ICHECK(param != nullptr); ICHECK(param->axis.defined()); diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 8c76dff0a0f5..3f345e04d8f0 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1264,12 +1264,12 @@ def test_scatter_add(self, target, dev, ref_data, dshape, ishape, axis, dtype): ], ) def test_gather(target, dev, executor_kind, data, axis, indices, ref_res): - def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"): + def verify_gather(data, axis, indices, ref_res): data = np.asarray(data, dtype="float32") - indices = np.asarray(indices, dtype=indices_dtype) + indices = np.asarray(indices, dtype="int32") ref_res = np.asarray(ref_res) d = relay.var("x", relay.TensorType(data.shape, "float32")) - i = relay.var("y", relay.TensorType(indices.shape, indices_dtype)) + i = relay.var("y", relay.TensorType(indices.shape, "int32")) z = relay.gather(d, axis, i) func = relay.Function([d, i], z) @@ -1280,7 +1280,6 @@ def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"): tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) verify_gather(data, axis, indices, ref_res) - verify_gather(data, axis, indices, ref_res, indices_dtype="uint32") def test_gather_nd(target, dev, executor_kind): diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index 622d6946e913..c0f595e8b5db 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -1007,16 +1007,6 @@ def test_gather(): verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5))) verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2))) verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10))) - verify_gather( - np.random.randn(4, 7, 5), - 2, - np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint32"), - ) - verify_gather( - np.random.randn(4, 7, 5), - 2, - np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint8"), - ) @tvm.testing.uses_gpu From 520f4f1f1c42406730d53f066e6ccf4e7b8e3b1b Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:12:15 -0800 Subject: [PATCH 32/72] undo changes --- tests/python/topi/python/test_topi_transform.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index c0f595e8b5db..cf2431bba982 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -18,11 +18,13 @@ import numpy as np import pytest import tvm -import tvm.testing +from tvm import te +from tvm import topi +from tvm import relay import tvm.topi.testing -from tvm import relay, te, topi from tvm.contrib.nvcc import have_fp16 +import tvm.testing def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = te.placeholder(shape=in_shape, name="A") From 7a0f43be4fec5b80c78a9e903ed5d48a5e12b2f4 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:12:35 -0800 Subject: [PATCH 33/72] undo changes --- tests/python/topi/python/test_topi_transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index cf2431bba982..42d2463b8952 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -26,6 +26,7 @@ import tvm.testing + def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = te.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) From 6f8f34a3271f27bfe9aae70cd72c6cb382b6522e Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:13:14 -0800 Subject: [PATCH 34/72] undo changes --- tests/python/relay/test_op_level3.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 3f345e04d8f0..6d6a2a9b65ed 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -21,8 +21,10 @@ import numpy as np import pytest + import tvm import tvm.testing + from tvm import relay, te from tvm.error import TVMError from tvm.relay import create_executor, transform @@ -30,6 +32,7 @@ from utils import ref_funcs + executor_kind = tvm.testing.parameter("graph", "debug") From 4e7b96aaa8b11a31694530b37ce0eaa5356a8880 Mon Sep 17 00:00:00 2001 From: "andrewzhaoluo (generated by with_the_same_user script)" Date: Fri, 28 Jan 2022 19:34:45 +0000 Subject: [PATCH 35/72] move thing in range --- .../relay/test_pass_fake_quantization_to_integer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 28166bb8be72..9cc359d472fd 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -305,14 +305,14 @@ def test_fake_quantize_global_avg_pool(): def test_fake_quantize_rsqrt(): - x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") - zero = relay.const(0) + x = relay.var("x", shape=[1, 3, 3, 3], dtype="int8") + mid_point = relay.const(-128) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(0.125), mid_point) op = relay.rsqrt(x) - op = relay.qnn.op.quantize(op, relay.const(2.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.125), mid_point) - x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") + x_np = np.random.randint(-128, 127, size=[1, 3, 3, 3], dtype="int8") compare_fq_to_int(op, [x_np], True) From 40d5a287e52f2d5ec70c69ed6336f37beaf3659d Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 12 Jan 2022 16:31:31 -0800 Subject: [PATCH 36/72] initial tanh impl --- .../transform/fake_quantization_to_integer.py | 104 ++++++++++++++++++ .../test_pass_fake_quantization_to_integer.py | 16 +++ 2 files changed, 120 insertions(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index db46c2cbfd58..19e7cee6a9dc 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -92,6 +92,110 @@ def identity(expr, type_map): return register_fake_quantization_to_integer(op_name, identity) +# TODO: replace with constant folding +def run_const_expr(expr): + mod = tvm.IRModule.from_expr(expr) + vm_exe = relay.create_executor("vm", mod=mod) + return vm_exe.evaluate()().asnumpy() + + +def create_integer_lookup_table( + floating_point_func, + input_scale, + input_zero_point, + output_scale, + output_zero_point, + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", +): + if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( + np.dtype(out_dtype), np.integer + ): + raise ValueError( + f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes." + ) + + dtype_info = np.iinfo(in_dtype) + + # Use TVMs quantization methods via relay to be consistent + inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) + inputs_dequantized = run_const_expr( + relay.qnn.op.dequantize( + inputs_quantized, + input_scale=input_scale, + input_zero_point=input_zero_point, + axis=in_axis, + ) + ) + + output_dequantized = relay.const(floating_point_func(inputs_dequantized)) + output_quantized = run_const_expr( + relay.qnn.op.quantize( + output_dequantized, output_scale, output_zero_point, out_axis, out_dtype + ) + ) + + return output_quantized + + +def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): + """Implement an operator in quantized space via table lookup operations (e.g. via gather). + + op_name: str + The name of the operator to register for FQ2I. + + example_func: Callable[[np.ndarray], np.ndarray] + The FP32 version of the function to quantize operating on numpy arrays. + """ + + def func(expr, type_map): + assert len(expr.args) == 1 + arg = expr.args[0] + in_scale = fold_constant(type_map[arg].scale) + in_zero_point = fold_constant(type_map[arg].zero_point) + out_scale = fold_constant(type_map[expr].scale) + out_zero_point = fold_constant(type_map[expr].zero_point) + if ( + not isinstance(in_scale, relay.Constant) + or not isinstance(in_zero_point, relay.Constant) + or not isinstance(out_scale, relay.Constant) + or not isinstance(out_zero_point, relay.Constant) + ): + raise ValueError( + f"{op_name} requires input/output quantization params to be known at compile time!" + ) + + # TODO: handle multi-channel q + in_scale = in_scale.data.numpy().item() + in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() + out_zero_point = out_zero_point.data.numpy().item() + + lookup_table = create_integer_lookup_table( + floating_point_func, + relay.const(in_scale), + relay.const(in_zero_point, dtype="int32"), + relay.const(out_scale), + relay.const(out_zero_point, dtype="int32"), + in_axis=type_map[arg].axis, + in_dtype=type_map[arg].dtype, + out_axis=type_map[expr].axis, + out_dtype=type_map[expr].dtype, + ) + lookup_table = relay.const(lookup_table) + index_tensor = relay.reshape(arg, [-1]) + result = relay.gather(lookup_table, -1, index_tensor) + result = relay.reshape_like(result, arg) + return [result, type_map[expr]] + + return register_fake_quantization_to_integer(op_name, func) + + +register_unary_elementwise_table_lookup_op("tanh", np.tanh) + register_unary_identity("reshape") register_unary_identity("squeeze") register_unary_identity("strided_slice") diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index aee2741782fd..9a2384705982 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -26,6 +26,7 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False): mod = tvm.relay.transform.InferType()(mod) mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod) + breakpoint() assert not tvm.ir.structural_equal(mod, mod_int) result = ( @@ -586,6 +587,21 @@ def run_test_case(partial_func): run_test_case(lambda x: relay.op.min(x, axis=1)) +def test_fake_quantize_tanh(): + x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + op = relay.op.tanh(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(1.0), zero) + + x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + def test_fq_hard_fail(): @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11) def conv2d(expr, type_map): # pylint: disable=unused-variable From 95537afed588596359a6be041ed0a42b2124b4b6 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 12 Jan 2022 16:42:28 -0800 Subject: [PATCH 37/72] smalls error --- python/tvm/relay/transform/fake_quantization_to_integer.py | 1 + tests/python/relay/test_pass_fake_quantization_to_integer.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 19e7cee6a9dc..84c22f8f9a94 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -189,6 +189,7 @@ def func(expr, type_map): index_tensor = relay.reshape(arg, [-1]) result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, arg) + breakpoint() return [result, type_map[expr]] return register_fake_quantization_to_integer(op_name, func) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 9a2384705982..113f1f21759b 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -591,11 +591,11 @@ def test_fake_quantize_tanh(): x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) op = relay.op.tanh(x) # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(1.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") From 496c2509b4b7ef3a375070b71aa0d698486bb4eb Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 18 Jan 2022 13:48:16 -0800 Subject: [PATCH 38/72] support uint and int lookup into tables --- .../transform/fake_quantization_to_integer.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 84c22f8f9a94..24907dc0c2ec 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -119,8 +119,20 @@ def create_integer_lookup_table( dtype_info = np.iinfo(in_dtype) + num_bits = dtype_info.bits + # Use TVMs quantization methods via relay to be consistent - inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + + # First generate a list of all num_bit integer patterns + inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}") + + # Reinterpret bits as the real datatype + # Note what we are doing here is a bit tricky, the canonical view of our lookup table + # is using the uintX version. When we run the lookup in the relay graph, we note + # that the "gather" operation used supports negative indices which make the mapping + # valid! + inputs_quantized = inputs_quantized.view(in_dtype) inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) inputs_dequantized = run_const_expr( relay.qnn.op.dequantize( @@ -185,11 +197,11 @@ def func(expr, type_map): out_axis=type_map[expr].axis, out_dtype=type_map[expr].dtype, ) + lookup_table = relay.const(lookup_table) index_tensor = relay.reshape(arg, [-1]) result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, arg) - breakpoint() return [result, type_map[expr]] return register_fake_quantization_to_integer(op_name, func) From 2334e1c364a79ea2b4cea7ed4f26459a660b0e41 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 18 Jan 2022 15:40:08 -0800 Subject: [PATCH 39/72] reinterpret cast, working tanh tests --- .../relay/transform/fake_quantization_to_integer.py | 11 +++++++---- .../relay/test_pass_fake_quantization_to_integer.py | 5 ++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 24907dc0c2ec..e15398c93e06 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -129,9 +129,8 @@ def create_integer_lookup_table( # Reinterpret bits as the real datatype # Note what we are doing here is a bit tricky, the canonical view of our lookup table - # is using the uintX version. When we run the lookup in the relay graph, we note - # that the "gather" operation used supports negative indices which make the mapping - # valid! + # is using the uintX version. When we run the lookup in the relay graph, we cast the + # bit pattern back into this form. inputs_quantized = inputs_quantized.view(in_dtype) inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) inputs_dequantized = run_const_expr( @@ -197,9 +196,13 @@ def func(expr, type_map): out_axis=type_map[expr].axis, out_dtype=type_map[expr].dtype, ) - + + in_dtype_info = np.iinfo(type_map[arg].dtype) + in_dtype_num_bits = in_dtype_info.bits + lookup_table = relay.const(lookup_table) index_tensor = relay.reshape(arg, [-1]) + index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, arg) return [result, type_map[expr]] diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 113f1f21759b..8c016e4c8976 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -26,7 +26,6 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False): mod = tvm.relay.transform.InferType()(mod) mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod) - breakpoint() assert not tvm.ir.structural_equal(mod, mod_int) result = ( @@ -588,7 +587,7 @@ def run_test_case(partial_func): def test_fake_quantize_tanh(): - x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") zero = relay.const(0) x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) @@ -597,7 +596,7 @@ def test_fake_quantize_tanh(): # Have difference scales for input/output to test if can handle op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") compare_fq_to_int(op, [x_np]) From 5c65eb17f648fc4fe5ecfcb55bcad32e63147b9b Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 09:47:56 -0800 Subject: [PATCH 40/72] refactor relay func creation --- .../transform/fake_quantization_to_integer.py | 65 ++++++++++++++++++- .../test_pass_fake_quantization_to_integer.py | 27 ++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index e15398c93e06..eca3d12d7bcd 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -152,6 +152,50 @@ def create_integer_lookup_table( return output_quantized +def create_integer_lookup_op( + input_arg, + floating_point_func, + in_scale, + in_zero_point, + out_scale, + out_zero_point, + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", +): + """ + TODO + """ + # TODO: handle multi-channel q + in_scale = in_scale.data.numpy().item() + in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() + out_zero_point = out_zero_point.data.numpy().item() + + lookup_table = create_integer_lookup_table( + floating_point_func, + relay.const(in_scale), + relay.const(in_zero_point, dtype="int32"), + relay.const(out_scale), + relay.const(out_zero_point, dtype="int32"), + in_axis=in_axis, + in_dtype=in_dtype, + out_axis=out_axis, + out_dtype=out_dtype, + ) + + in_dtype_info = np.iinfo(in_dtype) + in_dtype_num_bits = in_dtype_info.bits + + lookup_table = relay.const(lookup_table) + index_tensor = relay.reshape(input_arg, [-1]) + index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") + result = relay.gather(lookup_table, -1, index_tensor) + result = relay.reshape_like(result, input_arg) + return result + + def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): """Implement an operator in quantized space via table lookup operations (e.g. via gather). @@ -163,12 +207,31 @@ def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): """ def func(expr, type_map): - assert len(expr.args) == 1 + assert len(expr.args) == 1, "only support elemwise ops for now!" arg = expr.args[0] in_scale = fold_constant(type_map[arg].scale) in_zero_point = fold_constant(type_map[arg].zero_point) out_scale = fold_constant(type_map[expr].scale) out_zero_point = fold_constant(type_map[expr].zero_point) + in_axis = type_map[arg].axis + in_dtype = type_map[arg].dtype + out_axis = type_map[expr].axis + out_dtype = type_map[expr].dtype + result = create_integer_lookup_op( + input_arg=arg, + floating_point_func=floating_point_func, + in_scale=in_scale, + in_zero_point=in_zero_point, + out_scale=out_scale, + out_zero_point=out_zero_point, + in_axis=in_axis, + in_dtype=in_dtype, + out_axis=out_axis, + out_dtype=out_dtype, + ) + return [result, type_map[expr]] + arg = expr.args[0] + if ( not isinstance(in_scale, relay.Constant) or not isinstance(in_zero_point, relay.Constant) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 8c016e4c8976..a23a316ae50f 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -19,6 +19,7 @@ import pytest import tvm from tvm import relay +from tvm.relay.transform import fake_quantization_to_integer def compare_fq_to_int(expr, args, allow_rounding_error=False): @@ -586,6 +587,32 @@ def run_test_case(partial_func): run_test_case(lambda x: relay.op.min(x, axis=1)) +class TestIntegerTableLookupTable: + """Consists of tests testing functionality of creating lookup tables for integer operations.""" + + # def __init__(self) -> None: + # self.input = np.arange(start=0, stop=256, dtype="uint8") + + def fake_identity_func_numpy(self, arr: np.ndarray): + return arr.astype("float32") + + """ + def fake_identity_func_relay(self): + fake_quantization_to_integer.register_fake_quantization_to_integer.fu + """ + + def test_int8_to_int8(self): + relay_result = fake_quantization_to_integer.create_integer_lookup_table( + self.uint8_identity_func, + relay.const(1.0, dtype="float32"), + relay.const(0, dtype="int32"), + relay.const(1.0, dtype="float32"), + relay.const(0, dtype="int32"), + in_dtype="int8", + out_dtype="int8", + ) + + def test_fake_quantize_tanh(): x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") From 7b865e09c49f7805a9a9ef34422ee4346e48e3d8 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 10:15:41 -0800 Subject: [PATCH 41/72] basic casting tests --- .../transform/fake_quantization_to_integer.py | 39 ---------- .../test_pass_fake_quantization_to_integer.py | 74 ++++++++++++++++--- 2 files changed, 64 insertions(+), 49 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index eca3d12d7bcd..96f5faab9b90 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -230,45 +230,6 @@ def func(expr, type_map): out_dtype=out_dtype, ) return [result, type_map[expr]] - arg = expr.args[0] - - if ( - not isinstance(in_scale, relay.Constant) - or not isinstance(in_zero_point, relay.Constant) - or not isinstance(out_scale, relay.Constant) - or not isinstance(out_zero_point, relay.Constant) - ): - raise ValueError( - f"{op_name} requires input/output quantization params to be known at compile time!" - ) - - # TODO: handle multi-channel q - in_scale = in_scale.data.numpy().item() - in_zero_point = in_zero_point.data.numpy().item() - out_scale = out_scale.data.numpy().item() - out_zero_point = out_zero_point.data.numpy().item() - - lookup_table = create_integer_lookup_table( - floating_point_func, - relay.const(in_scale), - relay.const(in_zero_point, dtype="int32"), - relay.const(out_scale), - relay.const(out_zero_point, dtype="int32"), - in_axis=type_map[arg].axis, - in_dtype=type_map[arg].dtype, - out_axis=type_map[expr].axis, - out_dtype=type_map[expr].dtype, - ) - - in_dtype_info = np.iinfo(type_map[arg].dtype) - in_dtype_num_bits = in_dtype_info.bits - - lookup_table = relay.const(lookup_table) - index_tensor = relay.reshape(arg, [-1]) - index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") - result = relay.gather(lookup_table, -1, index_tensor) - result = relay.reshape_like(result, arg) - return [result, type_map[expr]] return register_fake_quantization_to_integer(op_name, func) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index a23a316ae50f..714dce4e6186 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -596,21 +596,75 @@ class TestIntegerTableLookupTable: def fake_identity_func_numpy(self, arr: np.ndarray): return arr.astype("float32") - """ - def fake_identity_func_relay(self): - fake_quantization_to_integer.register_fake_quantization_to_integer.fu - """ + def fake_identity_func_relay( + self, + input_arg=None, + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(0, dtype="int32"), + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(0, dtype="int32"), + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", + ): + if input_arg is None: + input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype)) + + return ( + fake_quantization_to_integer.create_integer_lookup_op( + input_arg=input_arg, + floating_point_func=self.fake_identity_func_numpy, + in_scale=in_scale, + in_zero_point=in_zero_point, + out_scale=out_scale, + out_zero_point=out_zero_point, + in_axis=in_axis, + out_axis=out_axis, + in_dtype=in_dtype, + out_dtype=out_dtype, + ), + input_arg.data.numpy(), + ) + + def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): + return (np_arr.astype("int32") - np_zero_point) * np_scale def test_int8_to_int8(self): - relay_result = fake_quantization_to_integer.create_integer_lookup_table( - self.uint8_identity_func, - relay.const(1.0, dtype="float32"), - relay.const(0, dtype="int32"), - relay.const(1.0, dtype="float32"), - relay.const(0, dtype="int32"), + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_uint8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_int8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(128, dtype="int32"), in_dtype="int8", + out_dtype="uint8", + ) + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg), + self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), + ) + + def test_uint8_to_int8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(128, dtype="int32"), + in_dtype="uint8", out_dtype="int8", ) + result = fake_quantization_to_integer.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), + self.dequantize_numpy(result), + ) def test_fake_quantize_tanh(): From f2934c015707cb883014b2b6637eb7b17d3b0f3d Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 10:18:26 -0800 Subject: [PATCH 42/72] explicitly say do not handle multi-channel lookups --- .../relay/transform/fake_quantization_to_integer.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 96f5faab9b90..f7278801fec2 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -167,12 +167,20 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() out_scale = out_scale.data.numpy().item() out_zero_point = out_zero_point.data.numpy().item() + # TODO: handle multi-channel q + if ( + in_scale.size() > 1 + or out_scale.size() > 1 + or in_zero_point.size() > 1 + or out_zero_point.size() > 1 + ): + raise ValueError("Do no support multi-channel quantization for now") + lookup_table = create_integer_lookup_table( floating_point_func, relay.const(in_scale), From a16a35269364367c8b32888d9dcd4a3a45f80ba3 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:27:56 -0800 Subject: [PATCH 43/72] add example funcs --- .../transform/fake_quantization_to_integer.py | 6 ++- .../test_pass_fake_quantization_to_integer.py | 45 +++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index f7278801fec2..c595202ea541 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -169,6 +169,7 @@ def create_integer_lookup_op( """ in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() out_zero_point = out_zero_point.data.numpy().item() @@ -179,7 +180,7 @@ def create_integer_lookup_op( or in_zero_point.size() > 1 or out_zero_point.size() > 1 ): - raise ValueError("Do no support multi-channel quantization for now") + raise ValueError("Do not support multi-channel quantization for now") lookup_table = create_integer_lookup_table( floating_point_func, @@ -243,6 +244,9 @@ def func(expr, type_map): register_unary_elementwise_table_lookup_op("tanh", np.tanh) +register_unary_elementwise_table_lookup_op("erf", np.math.erf) +register_unary_elementwise_table_lookup_op("exp", np.math.exp) +register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) register_unary_identity("reshape") register_unary_identity("squeeze") diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 714dce4e6186..a7ab19bd059f 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -682,6 +682,51 @@ def test_fake_quantize_tanh(): compare_fq_to_int(op, [x_np]) +def test_fake_quantize_erf(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.erf(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_exp(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.exp(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_sigmoid(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.sigmoid(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + def test_fq_hard_fail(): @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11) def conv2d(expr, type_map): # pylint: disable=unused-variable From b28a65e3b2704a59dda3193b74ae4168b086c339 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:34:53 -0800 Subject: [PATCH 44/72] fix silent fail --- .../relay/transform/fake_quantization_to_integer.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index c595202ea541..63dabf98088e 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -167,21 +167,13 @@ def create_integer_lookup_op( """ TODO """ + + # TODO: handle multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() - out_scale = out_scale.data.numpy().item() out_zero_point = out_zero_point.data.numpy().item() - # TODO: handle multi-channel q - if ( - in_scale.size() > 1 - or out_scale.size() > 1 - or in_zero_point.size() > 1 - or out_zero_point.size() > 1 - ): - raise ValueError("Do not support multi-channel quantization for now") - lookup_table = create_integer_lookup_table( floating_point_func, relay.const(in_scale), From fb22ee30c9ad3df384b36f160199be540aec0806 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:46:13 -0800 Subject: [PATCH 45/72] fix some bugs with floating point funcs not working --- python/tvm/relay/transform/fake_quantization_to_integer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 63dabf98088e..2e293d44fc7d 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -17,6 +17,7 @@ """Relay functions for rewriting fake quantized ops.""" import numpy as np import tvm +from scipy import special from tvm import relay from tvm.ir import TensorAffineType, TupleAffineType from tvm.tir import bijective_layout @@ -167,7 +168,6 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() @@ -236,8 +236,8 @@ def func(expr, type_map): register_unary_elementwise_table_lookup_op("tanh", np.tanh) -register_unary_elementwise_table_lookup_op("erf", np.math.erf) -register_unary_elementwise_table_lookup_op("exp", np.math.exp) +register_unary_elementwise_table_lookup_op("erf", special.erf) +register_unary_elementwise_table_lookup_op("exp", np.exp) register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) register_unary_identity("reshape") From 0a03d46b9fe2ae77ebcdc1e1b18fa6bfa639f07f Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 19 Jan 2022 12:48:56 -0800 Subject: [PATCH 46/72] add TODO --- python/tvm/relay/transform/fake_quantization_to_integer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 2e293d44fc7d..d6a9d534767a 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -235,6 +235,7 @@ def func(expr, type_map): return register_fake_quantization_to_integer(op_name, func) +# TODO: better error messages if reference functions fail in FQ2I pass register_unary_elementwise_table_lookup_op("tanh", np.tanh) register_unary_elementwise_table_lookup_op("erf", special.erf) register_unary_elementwise_table_lookup_op("exp", np.exp) From f8a5114042d3c239fd1903c511d10991932b060d Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Fri, 21 Jan 2022 16:20:38 -0800 Subject: [PATCH 47/72] add tood --- python/tvm/relay/transform/fake_quantization_to_integer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index d6a9d534767a..432870dc98dc 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -111,6 +111,9 @@ def create_integer_lookup_table( in_dtype="uint8", out_dtype="uint8", ): + """ + TODO + """ if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( np.dtype(out_dtype), np.integer ): From cc2f5a9f845e6377cb0b9aa8114eb904c389c392 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 12:50:19 -0800 Subject: [PATCH 48/72] canonicalizations --- python/tvm/relay/qnn/op/__init__.py | 4 ++-- python/tvm/relay/qnn/op/canonicalizations.py | 0 python/tvm/relay/qnn/op/op.py | 25 +++++++++++++++++++- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 python/tvm/relay/qnn/op/canonicalizations.py diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py index 848409360a9d..745050e286e8 100644 --- a/python/tvm/relay/qnn/op/__init__.py +++ b/python/tvm/relay/qnn/op/__init__.py @@ -18,5 +18,5 @@ """QNN dialect related operators.""" from __future__ import absolute_import as _abs from .qnn import * -from .op import register_qnn_legalize -from . import _qnn, legalizations, layout_conversions +from .op import register_qnn_legalize, register_qnn_canonicalize +from . import _qnn, legalizations, layout_conversions, canonicalizations diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py index 32a61229951c..c83a32e2ce6a 100644 --- a/python/tvm/relay/qnn/op/op.py +++ b/python/tvm/relay/qnn/op/op.py @@ -20,7 +20,10 @@ def register_qnn_legalize(op_name, legal_op=None, level=10): - """Register legal transformation function for a QNN op + """Register legal transformation function for a QNN op. + + This helps QNN match hardware intrinsics better and is run before + canonicalization. Parameters ---------- @@ -34,3 +37,23 @@ def register_qnn_legalize(op_name, legal_op=None, level=10): The priority level """ return tvm.ir.register_op_attr(op_name, "FTVMQnnLegalize", legal_op, level) + + +def register_qnn_canonicalize(op_name, legal_op=None, level=10): + """Register canonicalization function for a QNN op. + + This transforms QNN ops to mainline Relay components. + + Parameters + ---------- + op_name : str + The name of the operator + + legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr + The function for transforming an expr to another expr. + + level : int + The priority level + """ + + return tvm.ir.register_op_attr(op_name, "FTVMQnnCanonicalize", legal_op, level) From 16aad845e0f08d78cce75eed4f82e3c8e7059371 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:02:57 -0800 Subject: [PATCH 49/72] refactor integer lookup ops into own folder --- python/tvm/relay/qnn/op/canonicalizations.py | 121 ++++++++++++++ .../transform/fake_quantization_to_integer.py | 151 ------------------ .../relay/qnn/test_canonicalizations.py | 146 +++++++++++++++++ .../test_pass_fake_quantization_to_integer.py | 140 ---------------- 4 files changed, 267 insertions(+), 291 deletions(-) create mode 100644 tests/python/relay/qnn/test_canonicalizations.py diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index e69de29bb2d1..d419e3eb7a8e 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -0,0 +1,121 @@ +from typing import Callable + +import numpy as np +import tvm +from tvm import relay + + +# TODO: replace with constant folding +def run_const_expr(expr: "relay.Expr") -> np.ndarray: + mod = tvm.IRModule.from_expr(expr) + vm_exe = relay.create_executor("vm", mod=mod) + return vm_exe.evaluate()().asnumpy() + + +def create_integer_lookup_table( + floating_point_func: Callable[[np.ndarray], np.ndarray], + input_scale: "relay.Expr", + input_zero_point: "relay.Expr", + output_scale: "relay.Expr", + output_zero_point: "relay.Expr", + in_axis: int = -1, + out_axis: int = -1, + in_dtype: str = "uint8", + out_dtype: str = "uint8", +) -> np.ndarray: + """ + TODO + """ + if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( + np.dtype(out_dtype), np.integer + ): + raise ValueError( + f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes." + ) + + dtype_info = np.iinfo(in_dtype) + + num_bits = dtype_info.bits + + # Use TVMs quantization methods via relay to be consistent + # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) + + # First generate a list of all num_bit integer patterns + inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}") + + # Reinterpret bits as the real datatype + # Note what we are doing here is a bit tricky, the canonical view of our lookup table + # is using the uintX version. When we run the lookup in the relay graph, we cast the + # bit pattern back into this form. + inputs_quantized = inputs_quantized.view(in_dtype) + inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) + inputs_dequantized = run_const_expr( + relay.qnn.op.dequantize( + inputs_quantized, + input_scale=input_scale, + input_zero_point=input_zero_point, + axis=in_axis, + ) + ) + + output_dequantized = relay.const(floating_point_func(inputs_dequantized)) + output_quantized = run_const_expr( + relay.qnn.op.quantize( + output_dequantized, output_scale, output_zero_point, out_axis, out_dtype + ) + ) + + return output_quantized + + +def create_integer_lookup_op( + input_arg: "relay.Expr", + floating_point_func: Callable[[np.array], np.array], + in_scale: "relay.Expr", + in_zero_point: "relay.Expr", + out_scale: "relay.Expr", + out_zero_point: "relay.Expr", + in_axis: int = -1, + out_axis: int = -1, + in_dtype: str = "uint8", + out_dtype: str = "uint8", +) -> "relay.Expr": + """ + TODO + """ + # TODO: handle multi-channel q + in_scale = in_scale.data.numpy().item() + in_zero_point = in_zero_point.data.numpy().item() + out_scale = out_scale.data.numpy().item() + out_zero_point = out_zero_point.data.numpy().item() + + lookup_table = create_integer_lookup_table( + floating_point_func, + relay.const(in_scale), + relay.const(in_zero_point, dtype="int32"), + relay.const(out_scale), + relay.const(out_zero_point, dtype="int32"), + in_axis=in_axis, + in_dtype=in_dtype, + out_axis=out_axis, + out_dtype=out_dtype, + ) + + in_dtype_info = np.iinfo(in_dtype) + in_dtype_num_bits = in_dtype_info.bits + + lookup_table = relay.const(lookup_table) + index_tensor = relay.reshape(input_arg, [-1]) + index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") + result = relay.gather(lookup_table, -1, index_tensor) + result = relay.reshape_like(result, input_arg) + return result + + +""" +# TODO: better error messages if reference functions fail in FQ2I pass +register_unary_elementwise_table_lookup_op("tanh", np.tanh) +register_unary_elementwise_table_lookup_op("erf", special.erf) +register_unary_elementwise_table_lookup_op("exp", np.exp) +register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) +""" diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 432870dc98dc..9fdc2186a397 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -93,157 +93,6 @@ def identity(expr, type_map): return register_fake_quantization_to_integer(op_name, identity) -# TODO: replace with constant folding -def run_const_expr(expr): - mod = tvm.IRModule.from_expr(expr) - vm_exe = relay.create_executor("vm", mod=mod) - return vm_exe.evaluate()().asnumpy() - - -def create_integer_lookup_table( - floating_point_func, - input_scale, - input_zero_point, - output_scale, - output_zero_point, - in_axis=-1, - out_axis=-1, - in_dtype="uint8", - out_dtype="uint8", -): - """ - TODO - """ - if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( - np.dtype(out_dtype), np.integer - ): - raise ValueError( - f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes." - ) - - dtype_info = np.iinfo(in_dtype) - - num_bits = dtype_info.bits - - # Use TVMs quantization methods via relay to be consistent - # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype) - - # First generate a list of all num_bit integer patterns - inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}") - - # Reinterpret bits as the real datatype - # Note what we are doing here is a bit tricky, the canonical view of our lookup table - # is using the uintX version. When we run the lookup in the relay graph, we cast the - # bit pattern back into this form. - inputs_quantized = inputs_quantized.view(in_dtype) - inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype) - inputs_dequantized = run_const_expr( - relay.qnn.op.dequantize( - inputs_quantized, - input_scale=input_scale, - input_zero_point=input_zero_point, - axis=in_axis, - ) - ) - - output_dequantized = relay.const(floating_point_func(inputs_dequantized)) - output_quantized = run_const_expr( - relay.qnn.op.quantize( - output_dequantized, output_scale, output_zero_point, out_axis, out_dtype - ) - ) - - return output_quantized - - -def create_integer_lookup_op( - input_arg, - floating_point_func, - in_scale, - in_zero_point, - out_scale, - out_zero_point, - in_axis=-1, - out_axis=-1, - in_dtype="uint8", - out_dtype="uint8", -): - """ - TODO - """ - # TODO: handle multi-channel q - in_scale = in_scale.data.numpy().item() - in_zero_point = in_zero_point.data.numpy().item() - out_scale = out_scale.data.numpy().item() - out_zero_point = out_zero_point.data.numpy().item() - - lookup_table = create_integer_lookup_table( - floating_point_func, - relay.const(in_scale), - relay.const(in_zero_point, dtype="int32"), - relay.const(out_scale), - relay.const(out_zero_point, dtype="int32"), - in_axis=in_axis, - in_dtype=in_dtype, - out_axis=out_axis, - out_dtype=out_dtype, - ) - - in_dtype_info = np.iinfo(in_dtype) - in_dtype_num_bits = in_dtype_info.bits - - lookup_table = relay.const(lookup_table) - index_tensor = relay.reshape(input_arg, [-1]) - index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") - result = relay.gather(lookup_table, -1, index_tensor) - result = relay.reshape_like(result, input_arg) - return result - - -def register_unary_elementwise_table_lookup_op(op_name, floating_point_func): - """Implement an operator in quantized space via table lookup operations (e.g. via gather). - - op_name: str - The name of the operator to register for FQ2I. - - example_func: Callable[[np.ndarray], np.ndarray] - The FP32 version of the function to quantize operating on numpy arrays. - """ - - def func(expr, type_map): - assert len(expr.args) == 1, "only support elemwise ops for now!" - arg = expr.args[0] - in_scale = fold_constant(type_map[arg].scale) - in_zero_point = fold_constant(type_map[arg].zero_point) - out_scale = fold_constant(type_map[expr].scale) - out_zero_point = fold_constant(type_map[expr].zero_point) - in_axis = type_map[arg].axis - in_dtype = type_map[arg].dtype - out_axis = type_map[expr].axis - out_dtype = type_map[expr].dtype - result = create_integer_lookup_op( - input_arg=arg, - floating_point_func=floating_point_func, - in_scale=in_scale, - in_zero_point=in_zero_point, - out_scale=out_scale, - out_zero_point=out_zero_point, - in_axis=in_axis, - in_dtype=in_dtype, - out_axis=out_axis, - out_dtype=out_dtype, - ) - return [result, type_map[expr]] - - return register_fake_quantization_to_integer(op_name, func) - - -# TODO: better error messages if reference functions fail in FQ2I pass -register_unary_elementwise_table_lookup_op("tanh", np.tanh) -register_unary_elementwise_table_lookup_op("erf", special.erf) -register_unary_elementwise_table_lookup_op("exp", np.exp) -register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) - register_unary_identity("reshape") register_unary_identity("squeeze") register_unary_identity("strided_slice") diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py new file mode 100644 index 000000000000..aad6bb3ede19 --- /dev/null +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -0,0 +1,146 @@ +import numpy as np +import tvm +from tvm import relay +from tvm.relay.qnn.op import canonicalizations + + +class TestIntegerTableLookupTable: + """Consists of tests testing functionality of creating lookup tables for integer operations.""" + + # def __init__(self) -> None: + # self.input = np.arange(start=0, stop=256, dtype="uint8") + + def fake_identity_func_numpy(self, arr: np.ndarray): + return arr.astype("float32") + + def fake_identity_func_relay( + self, + input_arg=None, + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(0, dtype="int32"), + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(0, dtype="int32"), + in_axis=-1, + out_axis=-1, + in_dtype="uint8", + out_dtype="uint8", + ): + if input_arg is None: + input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype)) + + return ( + canonicalizations.create_integer_lookup_op( + input_arg=input_arg, + floating_point_func=self.fake_identity_func_numpy, + in_scale=in_scale, + in_zero_point=in_zero_point, + out_scale=out_scale, + out_zero_point=out_zero_point, + in_axis=in_axis, + out_axis=out_axis, + in_dtype=in_dtype, + out_dtype=out_dtype, + ), + input_arg.data.numpy(), + ) + + def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): + return (np_arr.astype("int32") - np_zero_point) * np_scale + + def test_int8_to_int8(self): + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_uint8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + + def test_int8_to_uint8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + out_scale=relay.const(1.0, dtype="float32"), + out_zero_point=relay.const(128, dtype="int32"), + in_dtype="int8", + out_dtype="uint8", + ) + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg), + self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), + ) + + def test_uint8_to_int8(self): + relay_lookup, input_arg = self.fake_identity_func_relay( + in_scale=relay.const(1.0, dtype="float32"), + in_zero_point=relay.const(128, dtype="int32"), + in_dtype="uint8", + out_dtype="int8", + ) + result = canonicalizations.run_const_expr(relay_lookup) + assert np.allclose( + self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), + self.dequantize_numpy(result), + ) + + +""" +def test_fake_quantize_tanh(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.tanh(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_erf(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.erf(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_exp(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.exp(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) + + +def test_fake_quantize_sigmoid(): + x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") + + zero = relay.const(0) + x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) + op = relay.op.sigmoid(x) + + # Have difference scales for input/output to test if can handle + op = relay.qnn.op.quantize(op, relay.const(0.01), zero) + + x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + + compare_fq_to_int(op, [x_np]) +""" diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index a7ab19bd059f..28166bb8be72 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -587,146 +587,6 @@ def run_test_case(partial_func): run_test_case(lambda x: relay.op.min(x, axis=1)) -class TestIntegerTableLookupTable: - """Consists of tests testing functionality of creating lookup tables for integer operations.""" - - # def __init__(self) -> None: - # self.input = np.arange(start=0, stop=256, dtype="uint8") - - def fake_identity_func_numpy(self, arr: np.ndarray): - return arr.astype("float32") - - def fake_identity_func_relay( - self, - input_arg=None, - in_scale=relay.const(1.0, dtype="float32"), - in_zero_point=relay.const(0, dtype="int32"), - out_scale=relay.const(1.0, dtype="float32"), - out_zero_point=relay.const(0, dtype="int32"), - in_axis=-1, - out_axis=-1, - in_dtype="uint8", - out_dtype="uint8", - ): - if input_arg is None: - input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype)) - - return ( - fake_quantization_to_integer.create_integer_lookup_op( - input_arg=input_arg, - floating_point_func=self.fake_identity_func_numpy, - in_scale=in_scale, - in_zero_point=in_zero_point, - out_scale=out_scale, - out_zero_point=out_zero_point, - in_axis=in_axis, - out_axis=out_axis, - in_dtype=in_dtype, - out_dtype=out_dtype, - ), - input_arg.data.numpy(), - ) - - def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): - return (np_arr.astype("int32") - np_zero_point) * np_scale - - def test_int8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) - - def test_uint8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) - - def test_int8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - out_scale=relay.const(1.0, dtype="float32"), - out_zero_point=relay.const(128, dtype="int32"), - in_dtype="int8", - out_dtype="uint8", - ) - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg), - self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), - ) - - def test_uint8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - in_scale=relay.const(1.0, dtype="float32"), - in_zero_point=relay.const(128, dtype="int32"), - in_dtype="uint8", - out_dtype="int8", - ) - result = fake_quantization_to_integer.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), - self.dequantize_numpy(result), - ) - - -def test_fake_quantize_tanh(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.tanh(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_erf(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.erf(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_exp(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.exp(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_sigmoid(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.sigmoid(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - def test_fq_hard_fail(): @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11) def conv2d(expr, type_map): # pylint: disable=unused-variable From eacf38368fb0975b0d4e3fee91673cae2f421011 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:09:33 -0800 Subject: [PATCH 50/72] fq2i stuff --- python/tvm/relay/transform/fake_quantization_to_integer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 9fdc2186a397..3337c7cfb894 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -17,9 +17,11 @@ """Relay functions for rewriting fake quantized ops.""" import numpy as np import tvm -from scipy import special from tvm import relay from tvm.ir import TensorAffineType, TupleAffineType + +# import to register canonicalization funcs for fq2i +from tvm.relay.qnn.op import canonicalizations from tvm.tir import bijective_layout from ..op import register_fake_quantization_to_integer From f1753c96fc4844bd9ebed743c6a1a3d7a7261161 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:33:35 -0800 Subject: [PATCH 51/72] clean up existing tests --- python/tvm/relay/qnn/op/canonicalizations.py | 2 +- .../relay/qnn/test_canonicalizations.py | 90 ++++++++++++++----- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index d419e3eb7a8e..334b824f8b50 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -5,8 +5,8 @@ from tvm import relay -# TODO: replace with constant folding def run_const_expr(expr: "relay.Expr") -> np.ndarray: + """Run a const expression, receiving result as np array.""" mod = tvm.IRModule.from_expr(expr) vm_exe = relay.create_executor("vm", mod=mod) return vm_exe.evaluate()().asnumpy() diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index aad6bb3ede19..0f9939f1cbd8 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -7,9 +7,6 @@ class TestIntegerTableLookupTable: """Consists of tests testing functionality of creating lookup tables for integer operations.""" - # def __init__(self) -> None: - # self.input = np.arange(start=0, stop=256, dtype="uint8") - def fake_identity_func_numpy(self, arr: np.ndarray): return arr.astype("float32") @@ -47,40 +44,85 @@ def fake_identity_func_relay( def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): return (np_arr.astype("int32") - np_zero_point) * np_scale - def test_int8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8") + def run_identity_function_test( + self, + in_scale: float, + in_zero_point: int, + out_scale: float, + out_zero_point: int, + in_dtype: str, + out_dtype: str, + rtol=1e-7, + atol=0, + ): + relay_lookup, input_arg = self.fake_identity_func_relay( + in_scale=relay.const(in_scale, "float32"), + in_zero_point=relay.const(in_zero_point, "int32"), + out_scale=relay.const(out_scale, "float32"), + out_zero_point=relay.const(out_zero_point, "int32"), + in_dtype=in_dtype, + out_dtype=out_dtype, + ) result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + np.testing.assert_allclose( + self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point), + self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point), + atol=atol, + rtol=rtol, + ) + + def test_int8_to_int8(self): + """Test int8 input to int8 output mapping workings""" + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=0, + out_scale=1.0, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + ) def test_uint8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8") - result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result)) + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=128, + out_scale=1.0, + out_zero_point=128, + in_dtype="uint8", + out_dtype="uint8", + ) def test_int8_to_uint8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - out_scale=relay.const(1.0, dtype="float32"), - out_zero_point=relay.const(128, dtype="int32"), + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=0, + out_scale=1.0, + out_zero_point=128, in_dtype="int8", out_dtype="uint8", ) - result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg), - self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128), - ) def test_uint8_to_int8(self): - relay_lookup, input_arg = self.fake_identity_func_relay( - in_scale=relay.const(1.0, dtype="float32"), - in_zero_point=relay.const(128, dtype="int32"), + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=128, + out_scale=1.0, + out_zero_point=0, in_dtype="uint8", out_dtype="int8", ) - result = canonicalizations.run_const_expr(relay_lookup) - assert np.allclose( - self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128), - self.dequantize_numpy(result), + + def test_different_in_out_qparams(self): + """Test mapping with different in/out qparams works.""" + self.run_identity_function_test( + in_scale=1.0, + in_zero_point=128, + out_scale=1.0, + out_zero_point=128, + in_dtype="uint8", + out_dtype="uint8", + atol=1, # numbers range from -128 -> 128 so not that big error + rtol=0, ) From 76cef1bcc450a4d3229676b64e009a66544c5d98 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 13:34:08 -0800 Subject: [PATCH 52/72] flesh out todo --- python/tvm/relay/qnn/op/canonicalizations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 334b824f8b50..19ee1b131908 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -83,7 +83,7 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q + # TODO: handle multi-channel q, if below fails it's probably that in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() out_scale = out_scale.data.numpy().item() From e996279eada5d2292db3d9772c16366eb9a03947 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 14:14:13 -0800 Subject: [PATCH 53/72] more tests --- .../relay/qnn/test_canonicalizations.py | 121 ++++++++---------- 1 file changed, 52 insertions(+), 69 deletions(-) diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index 0f9939f1cbd8..e13d96885051 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -1,6 +1,9 @@ +from typing import Callable + import numpy as np import tvm from tvm import relay +from tvm.relay.op.transform import arange from tvm.relay.qnn.op import canonicalizations @@ -13,6 +16,7 @@ def fake_identity_func_numpy(self, arr: np.ndarray): def fake_identity_func_relay( self, input_arg=None, + floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, in_scale=relay.const(1.0, dtype="float32"), in_zero_point=relay.const(0, dtype="int32"), out_scale=relay.const(1.0, dtype="float32"), @@ -28,7 +32,7 @@ def fake_identity_func_relay( return ( canonicalizations.create_integer_lookup_op( input_arg=input_arg, - floating_point_func=self.fake_identity_func_numpy, + floating_point_func=floating_point_func, in_scale=in_scale, in_zero_point=in_zero_point, out_scale=out_scale, @@ -44,7 +48,7 @@ def fake_identity_func_relay( def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0): return (np_arr.astype("int32") - np_zero_point) * np_scale - def run_identity_function_test( + def run_function_test( self, in_scale: float, in_zero_point: int, @@ -52,10 +56,14 @@ def run_identity_function_test( out_zero_point: int, in_dtype: str, out_dtype: str, + floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, + input_arg: relay.Expr = None, rtol=1e-7, atol=0, ): relay_lookup, input_arg = self.fake_identity_func_relay( + input_arg=input_arg, + floating_point_func=floating_point_func, in_scale=relay.const(in_scale, "float32"), in_zero_point=relay.const(in_zero_point, "int32"), out_scale=relay.const(out_scale, "float32"), @@ -65,15 +73,18 @@ def run_identity_function_test( ) result = canonicalizations.run_const_expr(relay_lookup) np.testing.assert_allclose( - self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point), + floating_point_func( + self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point) + ), self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point), atol=atol, rtol=rtol, ) + """Test mapping between different input/output dtypes""" + def test_int8_to_int8(self): - """Test int8 input to int8 output mapping workings""" - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=0, out_scale=1.0, @@ -83,7 +94,7 @@ def test_int8_to_int8(self): ) def test_uint8_to_uint8(self): - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=128, out_scale=1.0, @@ -93,7 +104,7 @@ def test_uint8_to_uint8(self): ) def test_int8_to_uint8(self): - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=0, out_scale=1.0, @@ -103,7 +114,7 @@ def test_int8_to_uint8(self): ) def test_uint8_to_int8(self): - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=128, out_scale=1.0, @@ -112,9 +123,10 @@ def test_uint8_to_int8(self): out_dtype="int8", ) + """Test mapping with different in/out qparams works.""" + def test_different_in_out_qparams(self): - """Test mapping with different in/out qparams works.""" - self.run_identity_function_test( + self.run_function_test( in_scale=1.0, in_zero_point=128, out_scale=1.0, @@ -125,64 +137,35 @@ def test_different_in_out_qparams(self): rtol=0, ) + """Test some simple functions""" -""" -def test_fake_quantize_tanh(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.tanh(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_erf(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.erf(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_exp(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.exp(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") - - compare_fq_to_int(op, [x_np]) - - -def test_fake_quantize_sigmoid(): - x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8") - - zero = relay.const(0) - x = relay.qnn.op.dequantize(x, relay.const(0.03), zero) - op = relay.op.sigmoid(x) - - # Have difference scales for input/output to test if can handle - op = relay.qnn.op.quantize(op, relay.const(0.01), zero) - - x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8") + def test_tanh(self): + # 1 / 64 in scale -- input range is ~ (-2, 2), tanh(+-2) ~= +-1 + # 1 / 128 out_scale -- output range is ~(-1, 1) + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8")), + in_scale=1 / 64, + in_zero_point=0, + out_scale=1 / 128, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=np.tanh, + atol=0.01, + rtol=0.01, + ) - compare_fq_to_int(op, [x_np]) -""" + def test_exp(self): + # input in floating point ~[-2, 2], final output ~[0, 8] + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8")), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=np.exp, + atol=0.03, + rtol=0.01, + ) From 1ff3adcbc42c736d9627c211a071381550fbbe4a Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 14:41:35 -0800 Subject: [PATCH 54/72] test on keeping shape good --- .../relay/qnn/test_canonicalizations.py | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index e13d96885051..334c628e5e54 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -15,8 +15,8 @@ def fake_identity_func_numpy(self, arr: np.ndarray): def fake_identity_func_relay( self, + floating_point_func: Callable[[np.ndarray], np.ndarray], input_arg=None, - floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, in_scale=relay.const(1.0, dtype="float32"), in_zero_point=relay.const(0, dtype="int32"), out_scale=relay.const(1.0, dtype="float32"), @@ -56,7 +56,7 @@ def run_function_test( out_zero_point: int, in_dtype: str, out_dtype: str, - floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy, + floating_point_func: Callable[[np.ndarray], np.ndarray], input_arg: relay.Expr = None, rtol=1e-7, atol=0, @@ -91,6 +91,7 @@ def test_int8_to_int8(self): out_zero_point=0, in_dtype="int8", out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, ) def test_uint8_to_uint8(self): @@ -101,6 +102,7 @@ def test_uint8_to_uint8(self): out_zero_point=128, in_dtype="uint8", out_dtype="uint8", + floating_point_func=self.fake_identity_func_numpy, ) def test_int8_to_uint8(self): @@ -111,6 +113,7 @@ def test_int8_to_uint8(self): out_zero_point=128, in_dtype="int8", out_dtype="uint8", + floating_point_func=self.fake_identity_func_numpy, ) def test_uint8_to_int8(self): @@ -121,6 +124,48 @@ def test_uint8_to_int8(self): out_zero_point=0, in_dtype="uint8", out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + ) + + """Test different input shapes""" + + def test_keep_input_shapes(self): + # input in floating point ~[-2, 2], final output ~[0, 8] + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 8, 8])), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + atol=0.03, + rtol=0.01, + ) + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 64])), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + atol=0.03, + rtol=0.01, + ) + self.run_function_test( + input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 128])), + in_scale=0.015, + in_zero_point=0, + out_scale=16 / 256, + out_zero_point=0, + in_dtype="int8", + out_dtype="int8", + floating_point_func=self.fake_identity_func_numpy, + atol=0.03, + rtol=0.01, ) """Test mapping with different in/out qparams works.""" @@ -133,6 +178,7 @@ def test_different_in_out_qparams(self): out_zero_point=128, in_dtype="uint8", out_dtype="uint8", + floating_point_func=self.fake_identity_func_numpy, atol=1, # numbers range from -128 -> 128 so not that big error rtol=0, ) From eabd40a55b8a879775aa048a422a7d24a3788ef9 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 14:42:00 -0800 Subject: [PATCH 55/72] lookup table fix --- tests/python/relay/qnn/test_canonicalizations.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index 334c628e5e54..84e01976f4f4 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -1,9 +1,7 @@ from typing import Callable import numpy as np -import tvm from tvm import relay -from tvm.relay.op.transform import arange from tvm.relay.qnn.op import canonicalizations From efe7b1aa6a25f0117162466f3ba08109e4675d90 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 15:04:48 -0800 Subject: [PATCH 56/72] replace canonicalization for rsqrt --- python/tvm/relay/qnn/op/canonicalizations.py | 23 +++++++++++++------- src/relay/qnn/op/rsqrt.cc | 4 ++-- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 19ee1b131908..b0ac0b1c15c1 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -3,6 +3,7 @@ import numpy as np import tvm from tvm import relay +from tvm.relay.qnn.op.op import register_qnn_canonicalize def run_const_expr(expr: "relay.Expr") -> np.ndarray: @@ -83,7 +84,7 @@ def create_integer_lookup_op( """ TODO """ - # TODO: handle multi-channel q, if below fails it's probably that + # TODO: handle multi-channel q, below will fail with multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() out_scale = out_scale.data.numpy().item() @@ -112,10 +113,16 @@ def create_integer_lookup_op( return result -""" -# TODO: better error messages if reference functions fail in FQ2I pass -register_unary_elementwise_table_lookup_op("tanh", np.tanh) -register_unary_elementwise_table_lookup_op("erf", special.erf) -register_unary_elementwise_table_lookup_op("exp", np.exp) -register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x))) -""" +@register_qnn_canonicalize("qnn.rsqrt") +def canonicalize_rsqrt(attrs, args, arg_types): + """Canonicalization for rsqrt""" + return create_integer_lookup_op( + args[0], + lambda arr: 1 / np.sqrt(arr), + args[1], + args[2], + args[3], + args[4], + in_dtype=arg_types[0].dtype, + out_dtype=arg_types[0].dtype, + ) diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc index 55814dff422b..2bd73d758144 100644 --- a/src/relay/qnn/op/rsqrt.cc +++ b/src/relay/qnn/op/rsqrt.cc @@ -105,6 +105,7 @@ Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array& new_args, return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1); } +// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py RELAY_REGISTER_OP("qnn.rsqrt") .describe("Elementwise rsqrt for quantized tensors.") .set_num_inputs(5) @@ -116,8 +117,7 @@ RELAY_REGISTER_OP("qnn.rsqrt") "The quantization zero_point of the output tensor.") .set_support_level(11) .add_type_rel("QRsqrt", QnnRsqrtRel) - .set_attr("TNonComputational", true) - .set_attr("FTVMQnnCanonicalize", QnnRsqrtCanonicalize); + .set_attr("TNonComputational", true); TVM_REGISTER_GLOBAL("relay.qnn.op._make.rsqrt").set_body_typed(MakeQuantizedRsqrt); From 3b000801acc01f40d6ca53f06964a18f03a21262 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 15:05:47 -0800 Subject: [PATCH 57/72] remove canonicalization of rsqrt --- src/relay/qnn/op/rsqrt.cc | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc index 2bd73d758144..6d37b1aa8d5d 100644 --- a/src/relay/qnn/op/rsqrt.cc +++ b/src/relay/qnn/op/rsqrt.cc @@ -69,42 +69,6 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale, return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {}); } -/* - * \brief Canonicalizes the QNN rsqrt op. - * \param attrs The empty attribute. - * \param new_args The new mutated args to the call node. - * \param arg_types The types of input and output. - * \return The sequence of Relay ops for add op. - */ -Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array& new_args, - const Array& arg_types) { - // At this time, due to the complexity of implementing this op in int8 or uint8, - // we dequantize the input, run the op in float, and then quantize the output (as below). - // This acts as a placeholder for future hardware enablement, where more hardware specific - // canonicalization can be provided. - - // Get the args. - QnnUnaryOpArguments args(new_args); - - // Get the input dtype and shape. - QnnUnaryOpTensorType input_type(arg_types, 0); - - // Get the types for dequantize/quantize. - Array types; - for (size_t i = 1; i < 5; ++i) { - types.push_back(arg_types[i]); - } - - // Dequantize input. - auto dequantized_arg = Dequantize(args.x, args.scale, args.zero_point, types, -1); - - // Compute Rsqrt(Q_x') - auto output = Rsqrt(dequantized_arg); - - // Quantize output. - return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1); -} - // Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py RELAY_REGISTER_OP("qnn.rsqrt") .describe("Elementwise rsqrt for quantized tensors.") From 3adcb9e9292492dcd2cce63eea7d7a11b6d6fcac Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 24 Jan 2022 20:55:19 -0800 Subject: [PATCH 58/72] add asf headers --- python/tvm/relay/qnn/op/canonicalizations.py | 16 ++++++++++++++++ tests/python/relay/qnn/test_canonicalizations.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index b0ac0b1c15c1..23c5e37f7ebe 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. from typing import Callable import numpy as np diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py index 84e01976f4f4..0505a88c07bd 100644 --- a/tests/python/relay/qnn/test_canonicalizations.py +++ b/tests/python/relay/qnn/test_canonicalizations.py @@ -1,3 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. from typing import Callable import numpy as np From 79289576be05edcda346fb0d031104037b5da552 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 13:51:14 -0800 Subject: [PATCH 59/72] gather supports unsigned integer tests --- tests/python/relay/test_op_level3.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 34f33240f5ac..327575fb8955 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1296,7 +1296,6 @@ def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"): verify_gather(data, axis, indices, ref_res) verify_gather(data, axis, indices, ref_res, indices_dtype="uint32") - verify_gather(data, axis, indices, ref_res) def test_gather_nd(target, dev, executor_kind): From 3b5759badc4c584470330e7f73a3e1342afc3883 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 14:10:24 -0800 Subject: [PATCH 60/72] fix things --- python/tvm/relay/qnn/op/canonicalizations.py | 51 ++++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 23c5e37f7ebe..db8b71e68998 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -23,7 +23,7 @@ def run_const_expr(expr: "relay.Expr") -> np.ndarray: - """Run a const expression, receiving result as np array.""" + """Evaluate a const expression, receiving result as np array.""" mod = tvm.IRModule.from_expr(expr) vm_exe = relay.create_executor("vm", mod=mod) return vm_exe.evaluate()().asnumpy() @@ -41,7 +41,24 @@ def create_integer_lookup_table( out_dtype: str = "uint8", ) -> np.ndarray: """ - TODO + Return a table where each input indexes to the quantized output approximating the given function. + + Note this also supports mapping unsigned and signed integers to each other. + + Args: + floating_point_func: The numpy function which this table is to approximate + input_scale: The scale of the quantized input tensor. + input_zero_point: The zero point of the quantized input tensor. + output_scale: The scale of the quantized output tensor. + output_zero_point: The zero point of the quantized output tensor. + in_axis: The axis for multi-channel quantization of the input if applicable. + out_axis: The axis for multi-channel quantization of the output if applicable. + in_dtype: The dtype of the input tensor. + out_dtype: The wanted dtype of the output tensor. + + Returns: + A numpy array where values in quantized space will index to the output in quantized space + approximating the given function. """ if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype( np.dtype(out_dtype), np.integer @@ -98,8 +115,24 @@ def create_integer_lookup_op( out_dtype: str = "uint8", ) -> "relay.Expr": """ - TODO + Create a quantized version of the given floating point unary operation using table lookup. + + Args: + input_arg: The quantized input to the final function. + floating_point_func: The numpy function which this table is to approximate + in_scale: The scale of the quantized input tensor. + in_zero_point: The zero point of the quantized input tensor. + out_scale: The scale of the quantized output tensor. + out_zero_point: The zero point of the quantized output tensor. + in_axis: The axis for multi-channel quantization of the input if applicable. + out_axis: The axis for multi-channel quantization of the output if applicable. + in_dtype: The dtype of the input tensor. + out_dtype: The wanted dtype of the output tensor. + + Returns: + A Relay expression representing a quantized version of the given function. """ + # TODO: handle multi-channel q, below will fail with multi-channel q in_scale = in_scale.data.numpy().item() in_zero_point = in_zero_point.data.numpy().item() @@ -133,12 +166,12 @@ def create_integer_lookup_op( def canonicalize_rsqrt(attrs, args, arg_types): """Canonicalization for rsqrt""" return create_integer_lookup_op( - args[0], - lambda arr: 1 / np.sqrt(arr), - args[1], - args[2], - args[3], - args[4], + input_arg=args[0], + floating_point_func=lambda arr: 1 / np.sqrt(arr), + in_scale=args[1], + in_zero_point=args[2], + out_scale=args[3], + out_zero_point=args[4], in_dtype=arg_types[0].dtype, out_dtype=arg_types[0].dtype, ) From a2f4c5e54119f8d5daff2c8673a0d5fcd1065a22 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Tue, 25 Jan 2022 14:37:40 -0800 Subject: [PATCH 61/72] move to legalization --- python/tvm/relay/qnn/op/canonicalizations.py | 15 --------------- python/tvm/relay/qnn/op/legalizations.py | 20 ++++++++++++++++++-- src/relay/qnn/op/rsqrt.cc | 4 +++- tests/python/relay/test_op_qnn_rsqrt.py | 4 +++- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index db8b71e68998..bd0108d72e64 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -160,18 +160,3 @@ def create_integer_lookup_op( result = relay.gather(lookup_table, -1, index_tensor) result = relay.reshape_like(result, input_arg) return result - - -@register_qnn_canonicalize("qnn.rsqrt") -def canonicalize_rsqrt(attrs, args, arg_types): - """Canonicalization for rsqrt""" - return create_integer_lookup_op( - input_arg=args[0], - floating_point_func=lambda arr: 1 / np.sqrt(arr), - in_scale=args[1], - in_zero_point=args[2], - out_scale=args[3], - out_zero_point=args[4], - in_dtype=arg_types[0].dtype, - out_dtype=arg_types[0].dtype, - ) diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 52fe6c8ebe2f..947e9d823134 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -17,12 +17,13 @@ # pylint: disable=invalid-name, unused-argument """Backend QNN related feature registration""" import numpy as np - import tvm from tvm import relay from tvm._ffi.base import TVMError -from .. import op as reg +from tvm.relay.qnn.op.canonicalizations import create_integer_lookup_op + from ....topi.x86.utils import target_has_sse42 +from .. import op as reg ################################################# # Register the functions for different operators. @@ -46,6 +47,21 @@ def legalize_qnn_dense(attrs, inputs, types): return qnn_dense_legalize(attrs, inputs, types) +# Registering QNN dense legalization function. +@reg.register_qnn_legalize("qnn.rsqrt") +def legalize_qnn_dense(attrs, inputs, types): + return create_integer_lookup_op( + input_arg=inputs[0], + floating_point_func=lambda arr: 1 / np.sqrt(arr), + in_scale=inputs[1], + in_zero_point=inputs[2], + out_scale=inputs[3], + out_zero_point=inputs[4], + in_dtype=types[0].dtype, + out_dtype=types[0].dtype, + ) + + # Default to None. If overridden by target, this will not be run. # Generic QNN Conv2D legalization function. @tvm.target.generic_func diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc index 6d37b1aa8d5d..93baa308a796 100644 --- a/src/relay/qnn/op/rsqrt.cc +++ b/src/relay/qnn/op/rsqrt.cc @@ -69,7 +69,9 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale, return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {}); } -// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py +// Translation to relay is done via canonicalization/legalization functions in python +// e.g. python/tvm/relay/qnn/op/canonicalizations.py or +// python/tvm/relay/qnn/op/legalizations.py RELAY_REGISTER_OP("qnn.rsqrt") .describe("Elementwise rsqrt for quantized tensors.") .set_num_inputs(5) diff --git a/tests/python/relay/test_op_qnn_rsqrt.py b/tests/python/relay/test_op_qnn_rsqrt.py index 1eb9b64057ca..0e40768343bd 100644 --- a/tests/python/relay/test_op_qnn_rsqrt.py +++ b/tests/python/relay/test_op_qnn_rsqrt.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -import tvm import numpy as np +import tvm from tvm import relay @@ -51,6 +51,7 @@ def test_saturation(): func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) + mod = relay.qnn.transform.Legalize()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] @@ -77,6 +78,7 @@ def test_saturation(): func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) mod = relay.transform.InferType()(mod) + mod = relay.qnn.transform.Legalize()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) func = mod["main"] From b5ec138bd2e5365584ac02a977259bacf909fb7f Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 11:06:40 -0800 Subject: [PATCH 62/72] jostle ci From fe54fa3f19af5a1cca02d004c3460377e5ee3c59 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 16:32:17 -0800 Subject: [PATCH 63/72] linting --- python/tvm/relay/qnn/op/canonicalizations.py | 4 ++-- python/tvm/relay/qnn/op/legalizations.py | 2 +- python/tvm/relay/qnn/op/op.py | 2 +- python/tvm/relay/transform/fake_quantization_to_integer.py | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index bd0108d72e64..05b68e731239 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -14,12 +14,12 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +"""Consist of utilities and methods for lowering QNN into mainline relay.""" from typing import Callable import numpy as np import tvm from tvm import relay -from tvm.relay.qnn.op.op import register_qnn_canonicalize def run_const_expr(expr: "relay.Expr") -> np.ndarray: @@ -41,7 +41,7 @@ def create_integer_lookup_table( out_dtype: str = "uint8", ) -> np.ndarray: """ - Return a table where each input indexes to the quantized output approximating the given function. + Return a table where each input indexes to the output quantizing the given function. Note this also supports mapping unsigned and signed integers to each other. diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 947e9d823134..fd835d72fc09 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -49,7 +49,7 @@ def legalize_qnn_dense(attrs, inputs, types): # Registering QNN dense legalization function. @reg.register_qnn_legalize("qnn.rsqrt") -def legalize_qnn_dense(attrs, inputs, types): +def legalize_qnn_rsqrt(attrs, inputs, types): return create_integer_lookup_op( input_arg=inputs[0], floating_point_func=lambda arr: 1 / np.sqrt(arr), diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py index c83a32e2ce6a..335947b9f7ce 100644 --- a/python/tvm/relay/qnn/op/op.py +++ b/python/tvm/relay/qnn/op/op.py @@ -49,7 +49,7 @@ def register_qnn_canonicalize(op_name, legal_op=None, level=10): op_name : str The name of the operator - legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr + legal_op: function (Attrs, List[Expr], List[relay.Type]) -> Expr The function for transforming an expr to another expr. level : int diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py index 3337c7cfb894..7398dc98c83d 100644 --- a/python/tvm/relay/transform/fake_quantization_to_integer.py +++ b/python/tvm/relay/transform/fake_quantization_to_integer.py @@ -21,6 +21,7 @@ from tvm.ir import TensorAffineType, TupleAffineType # import to register canonicalization funcs for fq2i +# pylint: disable=unused-import from tvm.relay.qnn.op import canonicalizations from tvm.tir import bijective_layout From 804e9fbe0643187973c4fafa1654004164b2891a Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:07:55 -0800 Subject: [PATCH 64/72] use take instead of gather --- python/tvm/relay/qnn/op/canonicalizations.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py index 05b68e731239..95e0cb60368d 100644 --- a/python/tvm/relay/qnn/op/canonicalizations.py +++ b/python/tvm/relay/qnn/op/canonicalizations.py @@ -155,8 +155,6 @@ def create_integer_lookup_op( in_dtype_num_bits = in_dtype_info.bits lookup_table = relay.const(lookup_table) - index_tensor = relay.reshape(input_arg, [-1]) - index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}") - result = relay.gather(lookup_table, -1, index_tensor) - result = relay.reshape_like(result, input_arg) + index_tensor = relay.reinterpret(input_arg, f"uint{in_dtype_num_bits}") + result = relay.take(lookup_table, index_tensor, axis=0, mode="fast") return result From 9a22774d96279c59f017d24e6a7ab4b8e22613d9 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:10:46 -0800 Subject: [PATCH 65/72] remove gather changes --- include/tvm/topi/transform.h | 2 +- src/relay/op/tensor/transform.cc | 3 +-- tests/python/relay/test_op_level3.py | 7 +++---- tests/python/topi/python/test_topi_transform.py | 10 ---------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h index acff301f4f07..5a0ec49773f7 100644 --- a/include/tvm/topi/transform.h +++ b/include/tvm/topi/transform.h @@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices, size_t indices_dim_i = static_cast(GetConstInt(indices->shape[axis])); ICHECK_GE(indices_dim_i, 1); } - ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()); + ICHECK(indices->dtype.is_int()); Array out_shape; for (size_t i = 0; i < ndim_i; ++i) { diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 19f6cdf85574..4ae265df3a75 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3322,8 +3322,7 @@ bool GatherRel(const Array& types, int num_inputs, const Attrs& attrs, << "Gather: expect indices type to be TensorType but get " << types[1]; return false; } - ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()) - << "indices of gather must be tensor of integer"; + ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer"; const auto param = attrs.as(); ICHECK(param != nullptr); ICHECK(param->axis.defined()); diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 327575fb8955..1bfaabf4bfce 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1278,12 +1278,12 @@ def test_scatter_add(self, target, dev, ref_data, dshape, ishape, axis, dtype, i ], ) def test_gather(target, dev, executor_kind, data, axis, indices, ref_res): - def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"): + def verify_gather(data, axis, indices, ref_res): data = np.asarray(data, dtype="float32") - indices = np.asarray(indices, dtype=indices_dtype) + indices = np.asarray(indices, dtype="int32") ref_res = np.asarray(ref_res) d = relay.var("x", relay.TensorType(data.shape, "float32")) - i = relay.var("y", relay.TensorType(indices.shape, indices_dtype)) + i = relay.var("y", relay.TensorType(indices.shape, "int32")) z = relay.gather(d, axis, i) func = relay.Function([d, i], z) @@ -1294,7 +1294,6 @@ def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"): tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5) verify_gather(data, axis, indices, ref_res) - verify_gather(data, axis, indices, ref_res, indices_dtype="uint32") diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index ddec14b16d01..50441699e95b 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -1011,16 +1011,6 @@ def test_gather(): verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5))) verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2))) verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10))) - verify_gather( - np.random.randn(4, 7, 5), - 2, - np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint32"), - ) - verify_gather( - np.random.randn(4, 7, 5), - 2, - np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint8"), - ) @tvm.testing.uses_gpu From a148ff1c14121a4fd36f4fed51de46d025e5b114 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:12:15 -0800 Subject: [PATCH 66/72] undo changes --- tests/python/topi/python/test_topi_transform.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index 50441699e95b..28e7c738e242 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -18,11 +18,13 @@ import numpy as np import pytest import tvm -import tvm.testing +from tvm import te +from tvm import topi +from tvm import relay import tvm.topi.testing -from tvm import relay, te, topi from tvm.contrib.nvcc import have_fp16 +import tvm.testing def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = te.placeholder(shape=in_shape, name="A") From a75ea9f5d0ebd8161965c6084f76e9cfb9c6354d Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:12:35 -0800 Subject: [PATCH 67/72] undo changes --- tests/python/topi/python/test_topi_transform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py index 28e7c738e242..730d22cba16a 100644 --- a/tests/python/topi/python/test_topi_transform.py +++ b/tests/python/topi/python/test_topi_transform.py @@ -26,6 +26,7 @@ import tvm.testing + def verify_expand_dims(in_shape, out_shape, axis, num_newaxis): A = te.placeholder(shape=in_shape, name="A") B = topi.expand_dims(A, axis, num_newaxis) From 3b3c68584864f301d84e9a4b61c3a53c343d7850 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Wed, 26 Jan 2022 19:13:14 -0800 Subject: [PATCH 68/72] undo changes --- tests/python/relay/test_op_level3.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 1bfaabf4bfce..bc0b354fe05e 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -21,8 +21,10 @@ import numpy as np import pytest + import tvm import tvm.testing + from tvm import relay, te from tvm.error import TVMError from tvm.relay import create_executor, transform @@ -30,6 +32,7 @@ from utils import ref_funcs + executor_kind = tvm.testing.parameter("graph", "debug") From b609d6381dd23a6b94f042c052088d894ecd41bd Mon Sep 17 00:00:00 2001 From: "andrewzhaoluo (generated by with_the_same_user script)" Date: Fri, 28 Jan 2022 19:34:45 +0000 Subject: [PATCH 69/72] move thing in range --- .../relay/test_pass_fake_quantization_to_integer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py index 28166bb8be72..9cc359d472fd 100644 --- a/tests/python/relay/test_pass_fake_quantization_to_integer.py +++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py @@ -305,14 +305,14 @@ def test_fake_quantize_global_avg_pool(): def test_fake_quantize_rsqrt(): - x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8") - zero = relay.const(0) + x = relay.var("x", shape=[1, 3, 3, 3], dtype="int8") + mid_point = relay.const(-128) - x = relay.qnn.op.dequantize(x, relay.const(2.0), zero) + x = relay.qnn.op.dequantize(x, relay.const(0.125), mid_point) op = relay.rsqrt(x) - op = relay.qnn.op.quantize(op, relay.const(2.0), zero) + op = relay.qnn.op.quantize(op, relay.const(0.125), mid_point) - x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8") + x_np = np.random.randint(-128, 127, size=[1, 3, 3, 3], dtype="int8") compare_fq_to_int(op, [x_np], True) From b0b7676f0f57d45d6e92c246f511bb0ad6b2fc0d Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 31 Jan 2022 14:44:15 -0800 Subject: [PATCH 70/72] lint --- tests/python/relay/test_op_level3.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index bc0b354fe05e..e58ceabd1879 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -21,10 +21,8 @@ import numpy as np import pytest - import tvm import tvm.testing - from tvm import relay, te from tvm.error import TVMError from tvm.relay import create_executor, transform @@ -32,7 +30,6 @@ from utils import ref_funcs - executor_kind = tvm.testing.parameter("graph", "debug") @@ -1299,7 +1296,6 @@ def verify_gather(data, axis, indices, ref_res): verify_gather(data, axis, indices, ref_res) - def test_gather_nd(target, dev, executor_kind): def verify_gather_nd(xshape, yshape, y_data, batch_dims=0, indices_dtype="int32"): x = relay.var("x", relay.TensorType(xshape, "float32")) From 5b919f117b0d6a591baf784ede18f23fab9a3973 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 7 Feb 2022 09:17:34 -0800 Subject: [PATCH 71/72] remove unneeded line --- include/tvm/topi/transform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h index 5a0ec49773f7..acff301f4f07 100644 --- a/include/tvm/topi/transform.h +++ b/include/tvm/topi/transform.h @@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices, size_t indices_dim_i = static_cast(GetConstInt(indices->shape[axis])); ICHECK_GE(indices_dim_i, 1); } - ICHECK(indices->dtype.is_int()); + ICHECK(indices->dtype.is_int() || indices->dtype.is_uint()); Array out_shape; for (size_t i = 0; i < ndim_i; ++i) { From 3240c8653e111c84d5d71fd4fac9f5e61e5d8984 Mon Sep 17 00:00:00 2001 From: Andrew Luo Date: Mon, 7 Feb 2022 09:19:15 -0800 Subject: [PATCH 72/72] jostle