From 29e5e8a9b8e1701cf344a51bebe50b65d6655577 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 12 Jan 2022 16:31:31 -0800
Subject: [PATCH 01/72] initial tanh impl

---
 .../transform/fake_quantization_to_integer.py | 104 ++++++++++++++++++
 .../test_pass_fake_quantization_to_integer.py |  16 +++
 2 files changed, 120 insertions(+)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index db46c2cbfd58..19e7cee6a9dc 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -92,6 +92,110 @@ def identity(expr, type_map):
     return register_fake_quantization_to_integer(op_name, identity)
 
 
+# TODO: replace with constant folding
+def run_const_expr(expr):
+    mod = tvm.IRModule.from_expr(expr)
+    vm_exe = relay.create_executor("vm", mod=mod)
+    return vm_exe.evaluate()().asnumpy()
+
+
+def create_integer_lookup_table(
+    floating_point_func,
+    input_scale,
+    input_zero_point,
+    output_scale,
+    output_zero_point,
+    in_axis=-1,
+    out_axis=-1,
+    in_dtype="uint8",
+    out_dtype="uint8",
+):
+    if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
+        np.dtype(out_dtype), np.integer
+    ):
+        raise ValueError(
+            f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes."
+        )
+
+    dtype_info = np.iinfo(in_dtype)
+
+    # Use TVMs quantization methods via relay to be consistent
+    inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+    inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
+    inputs_dequantized = run_const_expr(
+        relay.qnn.op.dequantize(
+            inputs_quantized,
+            input_scale=input_scale,
+            input_zero_point=input_zero_point,
+            axis=in_axis,
+        )
+    )
+
+    output_dequantized = relay.const(floating_point_func(inputs_dequantized))
+    output_quantized = run_const_expr(
+        relay.qnn.op.quantize(
+            output_dequantized, output_scale, output_zero_point, out_axis, out_dtype
+        )
+    )
+
+    return output_quantized
+
+
+def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
+    """Implement an operator in quantized space via table lookup operations (e.g. via gather).
+
+    op_name: str
+        The name of the operator to register for FQ2I.
+
+    example_func: Callable[[np.ndarray], np.ndarray]
+        The FP32 version of the function to quantize operating on numpy arrays.
+    """
+
+    def func(expr, type_map):
+        assert len(expr.args) == 1
+        arg = expr.args[0]
+        in_scale = fold_constant(type_map[arg].scale)
+        in_zero_point = fold_constant(type_map[arg].zero_point)
+        out_scale = fold_constant(type_map[expr].scale)
+        out_zero_point = fold_constant(type_map[expr].zero_point)
+        if (
+            not isinstance(in_scale, relay.Constant)
+            or not isinstance(in_zero_point, relay.Constant)
+            or not isinstance(out_scale, relay.Constant)
+            or not isinstance(out_zero_point, relay.Constant)
+        ):
+            raise ValueError(
+                f"{op_name} requires input/output quantization params to be known at compile time!"
+            )
+
+        # TODO: handle multi-channel q
+        in_scale = in_scale.data.numpy().item()
+        in_zero_point = in_zero_point.data.numpy().item()
+        out_scale = out_scale.data.numpy().item()
+        out_zero_point = out_zero_point.data.numpy().item()
+
+        lookup_table = create_integer_lookup_table(
+            floating_point_func,
+            relay.const(in_scale),
+            relay.const(in_zero_point, dtype="int32"),
+            relay.const(out_scale),
+            relay.const(out_zero_point, dtype="int32"),
+            in_axis=type_map[arg].axis,
+            in_dtype=type_map[arg].dtype,
+            out_axis=type_map[expr].axis,
+            out_dtype=type_map[expr].dtype,
+        )
+        lookup_table = relay.const(lookup_table)
+        index_tensor = relay.reshape(arg, [-1])
+        result = relay.gather(lookup_table, -1, index_tensor)
+        result = relay.reshape_like(result, arg)
+        return [result, type_map[expr]]
+
+    return register_fake_quantization_to_integer(op_name, func)
+
+
+register_unary_elementwise_table_lookup_op("tanh", np.tanh)
+
 register_unary_identity("reshape")
 register_unary_identity("squeeze")
 register_unary_identity("strided_slice")
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index aee2741782fd..9a2384705982 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -26,6 +26,7 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False):
     mod = tvm.relay.transform.InferType()(mod)
 
     mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod)
+    breakpoint()
     assert not tvm.ir.structural_equal(mod, mod_int)
 
     result = (
@@ -586,6 +587,21 @@ def run_test_case(partial_func):
     run_test_case(lambda x: relay.op.min(x, axis=1))
 
 
+def test_fake_quantize_tanh():
+    x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(2.0), zero)
+    op = relay.op.tanh(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(1.0), zero)
+
+    x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
 def test_fq_hard_fail():
     @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11)
     def conv2d(expr, type_map):  # pylint: disable=unused-variable

From b414aeba93655559d5c6ec936e51f700bbf2b8f7 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 12 Jan 2022 16:42:28 -0800
Subject: [PATCH 02/72] smalls error

---
 python/tvm/relay/transform/fake_quantization_to_integer.py   | 1 +
 tests/python/relay/test_pass_fake_quantization_to_integer.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 19e7cee6a9dc..84c22f8f9a94 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -189,6 +189,7 @@ def func(expr, type_map):
         index_tensor = relay.reshape(arg, [-1])
         result = relay.gather(lookup_table, -1, index_tensor)
         result = relay.reshape_like(result, arg)
+        breakpoint()
         return [result, type_map[expr]]
 
     return register_fake_quantization_to_integer(op_name, func)
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 9a2384705982..113f1f21759b 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -591,11 +591,11 @@ def test_fake_quantize_tanh():
     x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
 
     zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(2.0), zero)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
     op = relay.op.tanh(x)
 
     # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(1.0), zero)
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
 
     x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
 

From 8f7a4f6744369d965be14ae9e83c963b2529018c Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 18 Jan 2022 13:48:16 -0800
Subject: [PATCH 03/72] support uint and int lookup into tables

---
 .../transform/fake_quantization_to_integer.py    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 84c22f8f9a94..24907dc0c2ec 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -119,8 +119,20 @@ def create_integer_lookup_table(
 
     dtype_info = np.iinfo(in_dtype)
 
+    num_bits = dtype_info.bits
+
     # Use TVMs quantization methods via relay to be consistent
-    inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+    # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+
+    # First generate a list of all num_bit integer patterns
+    inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}")
+
+    # Reinterpret bits as the real datatype
+    # Note what we are doing here is a bit tricky, the canonical view of our lookup table
+    # is using the uintX version. When we run the lookup in the relay graph, we note 
+    # that the "gather" operation used supports negative indices which make the mapping
+    # valid! 
+    inputs_quantized = inputs_quantized.view(in_dtype)
     inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
     inputs_dequantized = run_const_expr(
         relay.qnn.op.dequantize(
@@ -185,11 +197,11 @@ def func(expr, type_map):
             out_axis=type_map[expr].axis,
             out_dtype=type_map[expr].dtype,
         )
+        
         lookup_table = relay.const(lookup_table)
         index_tensor = relay.reshape(arg, [-1])
         result = relay.gather(lookup_table, -1, index_tensor)
         result = relay.reshape_like(result, arg)
-        breakpoint()
         return [result, type_map[expr]]
 
     return register_fake_quantization_to_integer(op_name, func)

From b8a54ee61587512de25f9b6502ec8cbcb0bb25ce Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 18 Jan 2022 15:40:08 -0800
Subject: [PATCH 04/72] reinterpret cast, working tanh tests

---
 include/tvm/topi/transform.h                          |  2 +-
 .../relay/transform/fake_quantization_to_integer.py   | 11 +++++++----
 src/relay/op/tensor/transform.cc                      |  3 ++-
 .../relay/test_pass_fake_quantization_to_integer.py   |  5 ++---
 4 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h
index 1ad9d7da72ba..83efd1c5825d 100644
--- a/include/tvm/topi/transform.h
+++ b/include/tvm/topi/transform.h
@@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices,
     size_t indices_dim_i = static_cast<size_t>(GetConstInt(indices->shape[axis]));
     ICHECK_GE(indices_dim_i, 1);
   }
-  ICHECK(indices->dtype.is_int());
+  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint());
 
   Array<PrimExpr> out_shape;
   for (size_t i = 0; i < ndim_i; ++i) {
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 24907dc0c2ec..e15398c93e06 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -129,9 +129,8 @@ def create_integer_lookup_table(
 
     # Reinterpret bits as the real datatype
     # Note what we are doing here is a bit tricky, the canonical view of our lookup table
-    # is using the uintX version. When we run the lookup in the relay graph, we note 
-    # that the "gather" operation used supports negative indices which make the mapping
-    # valid! 
+    # is using the uintX version. When we run the lookup in the relay graph, we cast the
+    # bit pattern back into this form.
     inputs_quantized = inputs_quantized.view(in_dtype)
     inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
     inputs_dequantized = run_const_expr(
@@ -197,9 +196,13 @@ def func(expr, type_map):
             out_axis=type_map[expr].axis,
             out_dtype=type_map[expr].dtype,
         )
-        
+
+        in_dtype_info = np.iinfo(type_map[arg].dtype)
+        in_dtype_num_bits = in_dtype_info.bits
+
         lookup_table = relay.const(lookup_table)
         index_tensor = relay.reshape(arg, [-1])
+        index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
         result = relay.gather(lookup_table, -1, index_tensor)
         result = relay.reshape_like(result, arg)
         return [result, type_map[expr]]
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 9e469f373131..ebf8a0653b70 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3318,7 +3318,8 @@ bool GatherRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
         << "Gather: expect indices type to be TensorType but get " << types[1];
     return false;
   }
-  ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer";
+  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint())
+      << "indices of gather must be tensor of integer";
   const auto param = attrs.as<GatherAttrs>();
   ICHECK(param != nullptr);
   ICHECK(param->axis.defined());
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 113f1f21759b..8c016e4c8976 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -26,7 +26,6 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False):
     mod = tvm.relay.transform.InferType()(mod)
 
     mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod)
-    breakpoint()
     assert not tvm.ir.structural_equal(mod, mod_int)
 
     result = (
@@ -588,7 +587,7 @@ def run_test_case(partial_func):
 
 
 def test_fake_quantize_tanh():
-    x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
 
     zero = relay.const(0)
     x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
@@ -597,7 +596,7 @@ def test_fake_quantize_tanh():
     # Have difference scales for input/output to test if can handle
     op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
 
-    x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
 
     compare_fq_to_int(op, [x_np])
 

From cf3eb4ea1cb736246e5309debb4d5270c69126b5 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 09:47:56 -0800
Subject: [PATCH 05/72] refactor relay func creation

---
 .../transform/fake_quantization_to_integer.py | 65 ++++++++++++++++++-
 .../test_pass_fake_quantization_to_integer.py | 27 ++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index e15398c93e06..eca3d12d7bcd 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -152,6 +152,50 @@ def create_integer_lookup_table(
     return output_quantized
 
 
+def create_integer_lookup_op(
+    input_arg,
+    floating_point_func,
+    in_scale,
+    in_zero_point,
+    out_scale,
+    out_zero_point,
+    in_axis=-1,
+    out_axis=-1,
+    in_dtype="uint8",
+    out_dtype="uint8",
+):
+    """
+    TODO
+    """
+    # TODO: handle multi-channel q
+    in_scale = in_scale.data.numpy().item()
+    in_zero_point = in_zero_point.data.numpy().item()
+    out_scale = out_scale.data.numpy().item()
+    out_zero_point = out_zero_point.data.numpy().item()
+
+    lookup_table = create_integer_lookup_table(
+        floating_point_func,
+        relay.const(in_scale),
+        relay.const(in_zero_point, dtype="int32"),
+        relay.const(out_scale),
+        relay.const(out_zero_point, dtype="int32"),
+        in_axis=in_axis,
+        in_dtype=in_dtype,
+        out_axis=out_axis,
+        out_dtype=out_dtype,
+    )
+
+    in_dtype_info = np.iinfo(in_dtype)
+    in_dtype_num_bits = in_dtype_info.bits
+
+    lookup_table = relay.const(lookup_table)
+    index_tensor = relay.reshape(input_arg, [-1])
+    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
+    result = relay.gather(lookup_table, -1, index_tensor)
+    result = relay.reshape_like(result, input_arg)
+    return result
+
+
 def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
     """Implement an operator in quantized space via table lookup operations (e.g. via gather).
 
@@ -163,12 +207,31 @@ def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
     """
 
     def func(expr, type_map):
-        assert len(expr.args) == 1
+        assert len(expr.args) == 1, "only support elemwise ops for now!"
         arg = expr.args[0]
         in_scale = fold_constant(type_map[arg].scale)
         in_zero_point = fold_constant(type_map[arg].zero_point)
         out_scale = fold_constant(type_map[expr].scale)
         out_zero_point = fold_constant(type_map[expr].zero_point)
+        in_axis = type_map[arg].axis
+        in_dtype = type_map[arg].dtype
+        out_axis = type_map[expr].axis
+        out_dtype = type_map[expr].dtype
+        result = create_integer_lookup_op(
+            input_arg=arg,
+            floating_point_func=floating_point_func,
+            in_scale=in_scale,
+            in_zero_point=in_zero_point,
+            out_scale=out_scale,
+            out_zero_point=out_zero_point,
+            in_axis=in_axis,
+            in_dtype=in_dtype,
+            out_axis=out_axis,
+            out_dtype=out_dtype,
+        )
+        return [result, type_map[expr]]
+        arg = expr.args[0]
+
         if (
             not isinstance(in_scale, relay.Constant)
             or not isinstance(in_zero_point, relay.Constant)
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 8c016e4c8976..a23a316ae50f 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -19,6 +19,7 @@
 import pytest
 import tvm
 from tvm import relay
+from tvm.relay.transform import fake_quantization_to_integer
 
 
 def compare_fq_to_int(expr, args, allow_rounding_error=False):
@@ -586,6 +587,32 @@ def run_test_case(partial_func):
     run_test_case(lambda x: relay.op.min(x, axis=1))
 
 
+class TestIntegerTableLookupTable:
+    """Consists of tests testing functionality of creating lookup tables for integer operations."""
+
+    # def __init__(self) -> None:
+    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
+
+    def fake_identity_func_numpy(self, arr: np.ndarray):
+        return arr.astype("float32")
+
+    """
+    def fake_identity_func_relay(self):
+        fake_quantization_to_integer.register_fake_quantization_to_integer.fu
+    """
+
+    def test_int8_to_int8(self):
+        relay_result = fake_quantization_to_integer.create_integer_lookup_table(
+            self.uint8_identity_func,
+            relay.const(1.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+            relay.const(1.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+            in_dtype="int8",
+            out_dtype="int8",
+        )
+
+
 def test_fake_quantize_tanh():
     x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
 

From 0c1a71d8ffab8bf59ac11f4c32f3037302924cff Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 10:15:41 -0800
Subject: [PATCH 06/72] basic casting tests

---
 .../transform/fake_quantization_to_integer.py | 39 ----------
 .../test_pass_fake_quantization_to_integer.py | 74 ++++++++++++++++---
 2 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index eca3d12d7bcd..96f5faab9b90 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -230,45 +230,6 @@ def func(expr, type_map):
             out_dtype=out_dtype,
         )
         return [result, type_map[expr]]
-        arg = expr.args[0]
-
-        if (
-            not isinstance(in_scale, relay.Constant)
-            or not isinstance(in_zero_point, relay.Constant)
-            or not isinstance(out_scale, relay.Constant)
-            or not isinstance(out_zero_point, relay.Constant)
-        ):
-            raise ValueError(
-                f"{op_name} requires input/output quantization params to be known at compile time!"
-            )
-
-        # TODO: handle multi-channel q
-        in_scale = in_scale.data.numpy().item()
-        in_zero_point = in_zero_point.data.numpy().item()
-        out_scale = out_scale.data.numpy().item()
-        out_zero_point = out_zero_point.data.numpy().item()
-
-        lookup_table = create_integer_lookup_table(
-            floating_point_func,
-            relay.const(in_scale),
-            relay.const(in_zero_point, dtype="int32"),
-            relay.const(out_scale),
-            relay.const(out_zero_point, dtype="int32"),
-            in_axis=type_map[arg].axis,
-            in_dtype=type_map[arg].dtype,
-            out_axis=type_map[expr].axis,
-            out_dtype=type_map[expr].dtype,
-        )
-
-        in_dtype_info = np.iinfo(type_map[arg].dtype)
-        in_dtype_num_bits = in_dtype_info.bits
-
-        lookup_table = relay.const(lookup_table)
-        index_tensor = relay.reshape(arg, [-1])
-        index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
-        result = relay.gather(lookup_table, -1, index_tensor)
-        result = relay.reshape_like(result, arg)
-        return [result, type_map[expr]]
 
     return register_fake_quantization_to_integer(op_name, func)
 
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index a23a316ae50f..714dce4e6186 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -596,21 +596,75 @@ class TestIntegerTableLookupTable:
     def fake_identity_func_numpy(self, arr: np.ndarray):
         return arr.astype("float32")
 
-    """
-    def fake_identity_func_relay(self):
-        fake_quantization_to_integer.register_fake_quantization_to_integer.fu
-    """
+    def fake_identity_func_relay(
+        self,
+        input_arg=None,
+        in_scale=relay.const(1.0, dtype="float32"),
+        in_zero_point=relay.const(0, dtype="int32"),
+        out_scale=relay.const(1.0, dtype="float32"),
+        out_zero_point=relay.const(0, dtype="int32"),
+        in_axis=-1,
+        out_axis=-1,
+        in_dtype="uint8",
+        out_dtype="uint8",
+    ):
+        if input_arg is None:
+            input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype))
+
+        return (
+            fake_quantization_to_integer.create_integer_lookup_op(
+                input_arg=input_arg,
+                floating_point_func=self.fake_identity_func_numpy,
+                in_scale=in_scale,
+                in_zero_point=in_zero_point,
+                out_scale=out_scale,
+                out_zero_point=out_zero_point,
+                in_axis=in_axis,
+                out_axis=out_axis,
+                in_dtype=in_dtype,
+                out_dtype=out_dtype,
+            ),
+            input_arg.data.numpy(),
+        )
+
+    def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
+        return (np_arr.astype("int32") - np_zero_point) * np_scale
 
     def test_int8_to_int8(self):
-        relay_result = fake_quantization_to_integer.create_integer_lookup_table(
-            self.uint8_identity_func,
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_uint8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_int8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            out_scale=relay.const(1.0, dtype="float32"),
+            out_zero_point=relay.const(128, dtype="int32"),
             in_dtype="int8",
+            out_dtype="uint8",
+        )
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg),
+            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
+        )
+
+    def test_uint8_to_int8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            in_scale=relay.const(1.0, dtype="float32"),
+            in_zero_point=relay.const(128, dtype="int32"),
+            in_dtype="uint8",
             out_dtype="int8",
         )
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
+            self.dequantize_numpy(result),
+        )
 
 
 def test_fake_quantize_tanh():

From c943ff19c2356e3ca0fd8c06ed69a61c5efc22b6 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 10:18:26 -0800
Subject: [PATCH 07/72] explicitly say do not handle multi-channel lookups

---
 .../relay/transform/fake_quantization_to_integer.py    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 96f5faab9b90..f7278801fec2 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -167,12 +167,20 @@ def create_integer_lookup_op(
     """
     TODO
     """
-    # TODO: handle multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
     out_scale = out_scale.data.numpy().item()
     out_zero_point = out_zero_point.data.numpy().item()
 
+    # TODO: handle multi-channel q
+    if (
+        in_scale.size() > 1
+        or out_scale.size() > 1
+        or in_zero_point.size() > 1
+        or out_zero_point.size() > 1
+    ):
+        raise ValueError("Do no support multi-channel quantization for now")
+
     lookup_table = create_integer_lookup_table(
         floating_point_func,
         relay.const(in_scale),

From 20737402cba0a4c5a6ab75bb557273d12296c64c Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:27:56 -0800
Subject: [PATCH 08/72] add example funcs

---
 .../transform/fake_quantization_to_integer.py |  6 ++-
 .../test_pass_fake_quantization_to_integer.py | 45 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index f7278801fec2..c595202ea541 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -169,6 +169,7 @@ def create_integer_lookup_op(
     """
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
+
     out_scale = out_scale.data.numpy().item()
     out_zero_point = out_zero_point.data.numpy().item()
 
@@ -179,7 +180,7 @@ def create_integer_lookup_op(
         or in_zero_point.size() > 1
         or out_zero_point.size() > 1
     ):
-        raise ValueError("Do no support multi-channel quantization for now")
+        raise ValueError("Do not support multi-channel quantization for now")
 
     lookup_table = create_integer_lookup_table(
         floating_point_func,
@@ -243,6 +244,9 @@ def func(expr, type_map):
 
 
 register_unary_elementwise_table_lookup_op("tanh", np.tanh)
+register_unary_elementwise_table_lookup_op("erf", np.math.erf)
+register_unary_elementwise_table_lookup_op("exp", np.math.exp)
+register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
 
 register_unary_identity("reshape")
 register_unary_identity("squeeze")
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 714dce4e6186..a7ab19bd059f 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -682,6 +682,51 @@ def test_fake_quantize_tanh():
     compare_fq_to_int(op, [x_np])
 
 
+def test_fake_quantize_erf():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.erf(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_exp():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.exp(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_sigmoid():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.sigmoid(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
 def test_fq_hard_fail():
     @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11)
     def conv2d(expr, type_map):  # pylint: disable=unused-variable

From 11674d30d6071800723a0713c9f9c2b679c68100 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:34:53 -0800
Subject: [PATCH 09/72] fix silent fail

---
 .../relay/transform/fake_quantization_to_integer.py  | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index c595202ea541..63dabf98088e 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -167,21 +167,13 @@ def create_integer_lookup_op(
     """
     TODO
     """
+
+    # TODO: handle multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
-
     out_scale = out_scale.data.numpy().item()
     out_zero_point = out_zero_point.data.numpy().item()
 
-    # TODO: handle multi-channel q
-    if (
-        in_scale.size() > 1
-        or out_scale.size() > 1
-        or in_zero_point.size() > 1
-        or out_zero_point.size() > 1
-    ):
-        raise ValueError("Do not support multi-channel quantization for now")
-
     lookup_table = create_integer_lookup_table(
         floating_point_func,
         relay.const(in_scale),

From 67baa39944a63ba9648a59805f2e35aedd083934 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:46:13 -0800
Subject: [PATCH 10/72] fix some bugs with floating point funcs not working

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 63dabf98088e..2e293d44fc7d 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -17,6 +17,7 @@
 """Relay functions for rewriting fake quantized ops."""
 import numpy as np
 import tvm
+from scipy import special
 from tvm import relay
 from tvm.ir import TensorAffineType, TupleAffineType
 from tvm.tir import bijective_layout
@@ -167,7 +168,6 @@ def create_integer_lookup_op(
     """
     TODO
     """
-
     # TODO: handle multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
@@ -236,8 +236,8 @@ def func(expr, type_map):
 
 
 register_unary_elementwise_table_lookup_op("tanh", np.tanh)
-register_unary_elementwise_table_lookup_op("erf", np.math.erf)
-register_unary_elementwise_table_lookup_op("exp", np.math.exp)
+register_unary_elementwise_table_lookup_op("erf", special.erf)
+register_unary_elementwise_table_lookup_op("exp", np.exp)
 register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
 
 register_unary_identity("reshape")

From 47e4b5ce91d0a475687d08914fc2bce8e8de9ea8 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:48:56 -0800
Subject: [PATCH 11/72] add TODO

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 2e293d44fc7d..d6a9d534767a 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -235,6 +235,7 @@ def func(expr, type_map):
     return register_fake_quantization_to_integer(op_name, func)
 
 
+# TODO: better error messages if reference functions fail in FQ2I pass
 register_unary_elementwise_table_lookup_op("tanh", np.tanh)
 register_unary_elementwise_table_lookup_op("erf", special.erf)
 register_unary_elementwise_table_lookup_op("exp", np.exp)

From 446e25abd27ddaef7edb85f5380ee3f03e16299f Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Fri, 21 Jan 2022 16:20:38 -0800
Subject: [PATCH 12/72] add tood

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index d6a9d534767a..432870dc98dc 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -111,6 +111,9 @@ def create_integer_lookup_table(
     in_dtype="uint8",
     out_dtype="uint8",
 ):
+    """
+    TODO
+    """
     if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
         np.dtype(out_dtype), np.integer
     ):

From 87e265c54efd119d94233de00ed2912ee29a19a0 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 12:50:19 -0800
Subject: [PATCH 13/72] canonicalizations

---
 python/tvm/relay/qnn/op/__init__.py          |  4 ++--
 python/tvm/relay/qnn/op/canonicalizations.py |  0
 python/tvm/relay/qnn/op/op.py                | 25 +++++++++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 python/tvm/relay/qnn/op/canonicalizations.py

diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
index 848409360a9d..745050e286e8 100644
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -18,5 +18,5 @@
 """QNN dialect related operators."""
 from __future__ import absolute_import as _abs
 from .qnn import *
-from .op import register_qnn_legalize
-from . import _qnn, legalizations, layout_conversions
+from .op import register_qnn_legalize, register_qnn_canonicalize
+from . import _qnn, legalizations, layout_conversions, canonicalizations
diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py
index 32a61229951c..c83a32e2ce6a 100644
--- a/python/tvm/relay/qnn/op/op.py
+++ b/python/tvm/relay/qnn/op/op.py
@@ -20,7 +20,10 @@
 
 
 def register_qnn_legalize(op_name, legal_op=None, level=10):
-    """Register legal transformation function for a QNN op
+    """Register legal transformation function for a QNN op.
+
+    This helps QNN match hardware intrinsics better and is run before
+    canonicalization.
 
     Parameters
     ----------
@@ -34,3 +37,23 @@ def register_qnn_legalize(op_name, legal_op=None, level=10):
         The priority level
     """
     return tvm.ir.register_op_attr(op_name, "FTVMQnnLegalize", legal_op, level)
+
+
+def register_qnn_canonicalize(op_name, legal_op=None, level=10):
+    """Register canonicalization function for a QNN op.
+
+    This transforms QNN ops to mainline Relay components.
+
+    Parameters
+    ----------
+    op_name : str
+        The name of the operator
+
+    legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr
+        The function for transforming an expr to another expr.
+
+    level : int
+        The priority level
+    """
+
+    return tvm.ir.register_op_attr(op_name, "FTVMQnnCanonicalize", legal_op, level)

From 400880cd09cb1decf494b1b2ea754b2b1566e584 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:02:57 -0800
Subject: [PATCH 14/72] refactor integer lookup ops into own folder

---
 python/tvm/relay/qnn/op/canonicalizations.py  | 121 ++++++++++++++
 .../transform/fake_quantization_to_integer.py | 151 ------------------
 .../relay/qnn/test_canonicalizations.py       | 146 +++++++++++++++++
 .../test_pass_fake_quantization_to_integer.py | 140 ----------------
 4 files changed, 267 insertions(+), 291 deletions(-)
 create mode 100644 tests/python/relay/qnn/test_canonicalizations.py

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index e69de29bb2d1..d419e3eb7a8e 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -0,0 +1,121 @@
+from typing import Callable
+
+import numpy as np
+import tvm
+from tvm import relay
+
+
+# TODO: replace with constant folding
+def run_const_expr(expr: "relay.Expr") -> np.ndarray:
+    mod = tvm.IRModule.from_expr(expr)
+    vm_exe = relay.create_executor("vm", mod=mod)
+    return vm_exe.evaluate()().asnumpy()
+
+
+def create_integer_lookup_table(
+    floating_point_func: Callable[[np.ndarray], np.ndarray],
+    input_scale: "relay.Expr",
+    input_zero_point: "relay.Expr",
+    output_scale: "relay.Expr",
+    output_zero_point: "relay.Expr",
+    in_axis: int = -1,
+    out_axis: int = -1,
+    in_dtype: str = "uint8",
+    out_dtype: str = "uint8",
+) -> np.ndarray:
+    """
+    TODO
+    """
+    if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
+        np.dtype(out_dtype), np.integer
+    ):
+        raise ValueError(
+            f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes."
+        )
+
+    dtype_info = np.iinfo(in_dtype)
+
+    num_bits = dtype_info.bits
+
+    # Use TVMs quantization methods via relay to be consistent
+    # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+
+    # First generate a list of all num_bit integer patterns
+    inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}")
+
+    # Reinterpret bits as the real datatype
+    # Note what we are doing here is a bit tricky, the canonical view of our lookup table
+    # is using the uintX version. When we run the lookup in the relay graph, we cast the
+    # bit pattern back into this form.
+    inputs_quantized = inputs_quantized.view(in_dtype)
+    inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
+    inputs_dequantized = run_const_expr(
+        relay.qnn.op.dequantize(
+            inputs_quantized,
+            input_scale=input_scale,
+            input_zero_point=input_zero_point,
+            axis=in_axis,
+        )
+    )
+
+    output_dequantized = relay.const(floating_point_func(inputs_dequantized))
+    output_quantized = run_const_expr(
+        relay.qnn.op.quantize(
+            output_dequantized, output_scale, output_zero_point, out_axis, out_dtype
+        )
+    )
+
+    return output_quantized
+
+
+def create_integer_lookup_op(
+    input_arg: "relay.Expr",
+    floating_point_func: Callable[[np.array], np.array],
+    in_scale: "relay.Expr",
+    in_zero_point: "relay.Expr",
+    out_scale: "relay.Expr",
+    out_zero_point: "relay.Expr",
+    in_axis: int = -1,
+    out_axis: int = -1,
+    in_dtype: str = "uint8",
+    out_dtype: str = "uint8",
+) -> "relay.Expr":
+    """
+    TODO
+    """
+    # TODO: handle multi-channel q
+    in_scale = in_scale.data.numpy().item()
+    in_zero_point = in_zero_point.data.numpy().item()
+    out_scale = out_scale.data.numpy().item()
+    out_zero_point = out_zero_point.data.numpy().item()
+
+    lookup_table = create_integer_lookup_table(
+        floating_point_func,
+        relay.const(in_scale),
+        relay.const(in_zero_point, dtype="int32"),
+        relay.const(out_scale),
+        relay.const(out_zero_point, dtype="int32"),
+        in_axis=in_axis,
+        in_dtype=in_dtype,
+        out_axis=out_axis,
+        out_dtype=out_dtype,
+    )
+
+    in_dtype_info = np.iinfo(in_dtype)
+    in_dtype_num_bits = in_dtype_info.bits
+
+    lookup_table = relay.const(lookup_table)
+    index_tensor = relay.reshape(input_arg, [-1])
+    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
+    result = relay.gather(lookup_table, -1, index_tensor)
+    result = relay.reshape_like(result, input_arg)
+    return result
+
+
+"""
+# TODO: better error messages if reference functions fail in FQ2I pass
+register_unary_elementwise_table_lookup_op("tanh", np.tanh)
+register_unary_elementwise_table_lookup_op("erf", special.erf)
+register_unary_elementwise_table_lookup_op("exp", np.exp)
+register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
+"""
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 432870dc98dc..9fdc2186a397 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -93,157 +93,6 @@ def identity(expr, type_map):
     return register_fake_quantization_to_integer(op_name, identity)
 
 
-# TODO: replace with constant folding
-def run_const_expr(expr):
-    mod = tvm.IRModule.from_expr(expr)
-    vm_exe = relay.create_executor("vm", mod=mod)
-    return vm_exe.evaluate()().asnumpy()
-
-
-def create_integer_lookup_table(
-    floating_point_func,
-    input_scale,
-    input_zero_point,
-    output_scale,
-    output_zero_point,
-    in_axis=-1,
-    out_axis=-1,
-    in_dtype="uint8",
-    out_dtype="uint8",
-):
-    """
-    TODO
-    """
-    if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
-        np.dtype(out_dtype), np.integer
-    ):
-        raise ValueError(
-            f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes."
-        )
-
-    dtype_info = np.iinfo(in_dtype)
-
-    num_bits = dtype_info.bits
-
-    # Use TVMs quantization methods via relay to be consistent
-    # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
-
-    # First generate a list of all num_bit integer patterns
-    inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}")
-
-    # Reinterpret bits as the real datatype
-    # Note what we are doing here is a bit tricky, the canonical view of our lookup table
-    # is using the uintX version. When we run the lookup in the relay graph, we cast the
-    # bit pattern back into this form.
-    inputs_quantized = inputs_quantized.view(in_dtype)
-    inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
-    inputs_dequantized = run_const_expr(
-        relay.qnn.op.dequantize(
-            inputs_quantized,
-            input_scale=input_scale,
-            input_zero_point=input_zero_point,
-            axis=in_axis,
-        )
-    )
-
-    output_dequantized = relay.const(floating_point_func(inputs_dequantized))
-    output_quantized = run_const_expr(
-        relay.qnn.op.quantize(
-            output_dequantized, output_scale, output_zero_point, out_axis, out_dtype
-        )
-    )
-
-    return output_quantized
-
-
-def create_integer_lookup_op(
-    input_arg,
-    floating_point_func,
-    in_scale,
-    in_zero_point,
-    out_scale,
-    out_zero_point,
-    in_axis=-1,
-    out_axis=-1,
-    in_dtype="uint8",
-    out_dtype="uint8",
-):
-    """
-    TODO
-    """
-    # TODO: handle multi-channel q
-    in_scale = in_scale.data.numpy().item()
-    in_zero_point = in_zero_point.data.numpy().item()
-    out_scale = out_scale.data.numpy().item()
-    out_zero_point = out_zero_point.data.numpy().item()
-
-    lookup_table = create_integer_lookup_table(
-        floating_point_func,
-        relay.const(in_scale),
-        relay.const(in_zero_point, dtype="int32"),
-        relay.const(out_scale),
-        relay.const(out_zero_point, dtype="int32"),
-        in_axis=in_axis,
-        in_dtype=in_dtype,
-        out_axis=out_axis,
-        out_dtype=out_dtype,
-    )
-
-    in_dtype_info = np.iinfo(in_dtype)
-    in_dtype_num_bits = in_dtype_info.bits
-
-    lookup_table = relay.const(lookup_table)
-    index_tensor = relay.reshape(input_arg, [-1])
-    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
-    result = relay.gather(lookup_table, -1, index_tensor)
-    result = relay.reshape_like(result, input_arg)
-    return result
-
-
-def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
-    """Implement an operator in quantized space via table lookup operations (e.g. via gather).
-
-    op_name: str
-        The name of the operator to register for FQ2I.
-
-    example_func: Callable[[np.ndarray], np.ndarray]
-        The FP32 version of the function to quantize operating on numpy arrays.
-    """
-
-    def func(expr, type_map):
-        assert len(expr.args) == 1, "only support elemwise ops for now!"
-        arg = expr.args[0]
-        in_scale = fold_constant(type_map[arg].scale)
-        in_zero_point = fold_constant(type_map[arg].zero_point)
-        out_scale = fold_constant(type_map[expr].scale)
-        out_zero_point = fold_constant(type_map[expr].zero_point)
-        in_axis = type_map[arg].axis
-        in_dtype = type_map[arg].dtype
-        out_axis = type_map[expr].axis
-        out_dtype = type_map[expr].dtype
-        result = create_integer_lookup_op(
-            input_arg=arg,
-            floating_point_func=floating_point_func,
-            in_scale=in_scale,
-            in_zero_point=in_zero_point,
-            out_scale=out_scale,
-            out_zero_point=out_zero_point,
-            in_axis=in_axis,
-            in_dtype=in_dtype,
-            out_axis=out_axis,
-            out_dtype=out_dtype,
-        )
-        return [result, type_map[expr]]
-
-    return register_fake_quantization_to_integer(op_name, func)
-
-
-# TODO: better error messages if reference functions fail in FQ2I pass
-register_unary_elementwise_table_lookup_op("tanh", np.tanh)
-register_unary_elementwise_table_lookup_op("erf", special.erf)
-register_unary_elementwise_table_lookup_op("exp", np.exp)
-register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
-
 register_unary_identity("reshape")
 register_unary_identity("squeeze")
 register_unary_identity("strided_slice")
diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
new file mode 100644
index 000000000000..aad6bb3ede19
--- /dev/null
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -0,0 +1,146 @@
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay.qnn.op import canonicalizations
+
+
+class TestIntegerTableLookupTable:
+    """Consists of tests testing functionality of creating lookup tables for integer operations."""
+
+    # def __init__(self) -> None:
+    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
+
+    def fake_identity_func_numpy(self, arr: np.ndarray):
+        return arr.astype("float32")
+
+    def fake_identity_func_relay(
+        self,
+        input_arg=None,
+        in_scale=relay.const(1.0, dtype="float32"),
+        in_zero_point=relay.const(0, dtype="int32"),
+        out_scale=relay.const(1.0, dtype="float32"),
+        out_zero_point=relay.const(0, dtype="int32"),
+        in_axis=-1,
+        out_axis=-1,
+        in_dtype="uint8",
+        out_dtype="uint8",
+    ):
+        if input_arg is None:
+            input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype))
+
+        return (
+            canonicalizations.create_integer_lookup_op(
+                input_arg=input_arg,
+                floating_point_func=self.fake_identity_func_numpy,
+                in_scale=in_scale,
+                in_zero_point=in_zero_point,
+                out_scale=out_scale,
+                out_zero_point=out_zero_point,
+                in_axis=in_axis,
+                out_axis=out_axis,
+                in_dtype=in_dtype,
+                out_dtype=out_dtype,
+            ),
+            input_arg.data.numpy(),
+        )
+
+    def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
+        return (np_arr.astype("int32") - np_zero_point) * np_scale
+
+    def test_int8_to_int8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_uint8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_int8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            out_scale=relay.const(1.0, dtype="float32"),
+            out_zero_point=relay.const(128, dtype="int32"),
+            in_dtype="int8",
+            out_dtype="uint8",
+        )
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg),
+            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
+        )
+
+    def test_uint8_to_int8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            in_scale=relay.const(1.0, dtype="float32"),
+            in_zero_point=relay.const(128, dtype="int32"),
+            in_dtype="uint8",
+            out_dtype="int8",
+        )
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
+            self.dequantize_numpy(result),
+        )
+
+
+"""
+def test_fake_quantize_tanh():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.tanh(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_erf():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.erf(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_exp():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.exp(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_sigmoid():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.sigmoid(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+"""
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index a7ab19bd059f..28166bb8be72 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -587,146 +587,6 @@ def run_test_case(partial_func):
     run_test_case(lambda x: relay.op.min(x, axis=1))
 
 
-class TestIntegerTableLookupTable:
-    """Consists of tests testing functionality of creating lookup tables for integer operations."""
-
-    # def __init__(self) -> None:
-    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
-
-    def fake_identity_func_numpy(self, arr: np.ndarray):
-        return arr.astype("float32")
-
-    def fake_identity_func_relay(
-        self,
-        input_arg=None,
-        in_scale=relay.const(1.0, dtype="float32"),
-        in_zero_point=relay.const(0, dtype="int32"),
-        out_scale=relay.const(1.0, dtype="float32"),
-        out_zero_point=relay.const(0, dtype="int32"),
-        in_axis=-1,
-        out_axis=-1,
-        in_dtype="uint8",
-        out_dtype="uint8",
-    ):
-        if input_arg is None:
-            input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype))
-
-        return (
-            fake_quantization_to_integer.create_integer_lookup_op(
-                input_arg=input_arg,
-                floating_point_func=self.fake_identity_func_numpy,
-                in_scale=in_scale,
-                in_zero_point=in_zero_point,
-                out_scale=out_scale,
-                out_zero_point=out_zero_point,
-                in_axis=in_axis,
-                out_axis=out_axis,
-                in_dtype=in_dtype,
-                out_dtype=out_dtype,
-            ),
-            input_arg.data.numpy(),
-        )
-
-    def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
-        return (np_arr.astype("int32") - np_zero_point) * np_scale
-
-    def test_int8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
-
-    def test_uint8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
-
-    def test_int8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            out_scale=relay.const(1.0, dtype="float32"),
-            out_zero_point=relay.const(128, dtype="int32"),
-            in_dtype="int8",
-            out_dtype="uint8",
-        )
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg),
-            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
-        )
-
-    def test_uint8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            in_scale=relay.const(1.0, dtype="float32"),
-            in_zero_point=relay.const(128, dtype="int32"),
-            in_dtype="uint8",
-            out_dtype="int8",
-        )
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
-            self.dequantize_numpy(result),
-        )
-
-
-def test_fake_quantize_tanh():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.tanh(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_erf():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.erf(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_exp():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.exp(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_sigmoid():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.sigmoid(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
 def test_fq_hard_fail():
     @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11)
     def conv2d(expr, type_map):  # pylint: disable=unused-variable

From 3d26528df0877c42dabfdd7aac96627320dc8d06 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:09:33 -0800
Subject: [PATCH 15/72] fq2i stuff

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 9fdc2186a397..3337c7cfb894 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -17,9 +17,11 @@
 """Relay functions for rewriting fake quantized ops."""
 import numpy as np
 import tvm
-from scipy import special
 from tvm import relay
 from tvm.ir import TensorAffineType, TupleAffineType
+
+# import to register canonicalization funcs for fq2i
+from tvm.relay.qnn.op import canonicalizations
 from tvm.tir import bijective_layout
 
 from ..op import register_fake_quantization_to_integer

From e60f2b4dd418697b15a44d73ebcfd24e5b7d916a Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:33:35 -0800
Subject: [PATCH 16/72] clean up existing tests

---
 python/tvm/relay/qnn/op/canonicalizations.py  |  2 +-
 .../relay/qnn/test_canonicalizations.py       | 90 ++++++++++++++-----
 2 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index d419e3eb7a8e..334b824f8b50 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -5,8 +5,8 @@
 from tvm import relay
 
 
-# TODO: replace with constant folding
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
+    """Run a const expression, receiving result as np array."""
     mod = tvm.IRModule.from_expr(expr)
     vm_exe = relay.create_executor("vm", mod=mod)
     return vm_exe.evaluate()().asnumpy()
diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index aad6bb3ede19..0f9939f1cbd8 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -7,9 +7,6 @@
 class TestIntegerTableLookupTable:
     """Consists of tests testing functionality of creating lookup tables for integer operations."""
 
-    # def __init__(self) -> None:
-    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
-
     def fake_identity_func_numpy(self, arr: np.ndarray):
         return arr.astype("float32")
 
@@ -47,40 +44,85 @@ def fake_identity_func_relay(
     def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
         return (np_arr.astype("int32") - np_zero_point) * np_scale
 
-    def test_int8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
+    def run_identity_function_test(
+        self,
+        in_scale: float,
+        in_zero_point: int,
+        out_scale: float,
+        out_zero_point: int,
+        in_dtype: str,
+        out_dtype: str,
+        rtol=1e-7,
+        atol=0,
+    ):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            in_scale=relay.const(in_scale, "float32"),
+            in_zero_point=relay.const(in_zero_point, "int32"),
+            out_scale=relay.const(out_scale, "float32"),
+            out_zero_point=relay.const(out_zero_point, "int32"),
+            in_dtype=in_dtype,
+            out_dtype=out_dtype,
+        )
         result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+        np.testing.assert_allclose(
+            self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point),
+            self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point),
+            atol=atol,
+            rtol=rtol,
+        )
+
+    def test_int8_to_int8(self):
+        """Test int8 input to int8 output mapping workings"""
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=0,
+            out_scale=1.0,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+        )
 
     def test_uint8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
-        result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=128,
+            out_scale=1.0,
+            out_zero_point=128,
+            in_dtype="uint8",
+            out_dtype="uint8",
+        )
 
     def test_int8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            out_scale=relay.const(1.0, dtype="float32"),
-            out_zero_point=relay.const(128, dtype="int32"),
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=0,
+            out_scale=1.0,
+            out_zero_point=128,
             in_dtype="int8",
             out_dtype="uint8",
         )
-        result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg),
-            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
-        )
 
     def test_uint8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            in_scale=relay.const(1.0, dtype="float32"),
-            in_zero_point=relay.const(128, dtype="int32"),
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=128,
+            out_scale=1.0,
+            out_zero_point=0,
             in_dtype="uint8",
             out_dtype="int8",
         )
-        result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
-            self.dequantize_numpy(result),
+
+    def test_different_in_out_qparams(self):
+        """Test mapping with different in/out qparams works."""
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=128,
+            out_scale=1.0,
+            out_zero_point=128,
+            in_dtype="uint8",
+            out_dtype="uint8",
+            atol=1,  # numbers range from -128 -> 128 so not that big error
+            rtol=0,
         )
 
 
From 8bd0b44d44b26c470076e5919c460e9250609851 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:34:08 -0800
Subject: [PATCH 17/72] flesh out todo

---
 python/tvm/relay/qnn/op/canonicalizations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 334b824f8b50..19ee1b131908 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -83,7 +83,7 @@ def create_integer_lookup_op(
     """
     TODO
     """
-    # TODO: handle multi-channel q
+    # TODO: handle multi-channel q, if below fails it's probably that
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
     out_scale = out_scale.data.numpy().item()

From daef1501809db3709305b662f50c7580f6d2993c Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 14:14:13 -0800
Subject: [PATCH 18/72] more tests

---
 .../relay/qnn/test_canonicalizations.py       | 121 ++++++++----------
 1 file changed, 52 insertions(+), 69 deletions(-)

diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index 0f9939f1cbd8..e13d96885051 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -1,6 +1,9 @@
+from typing import Callable
+
 import numpy as np
 import tvm
 from tvm import relay
+from tvm.relay.op.transform import arange
 from tvm.relay.qnn.op import canonicalizations
 
 
@@ -13,6 +16,7 @@ def fake_identity_func_numpy(self, arr: np.ndarray):
     def fake_identity_func_relay(
         self,
         input_arg=None,
+        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
         in_scale=relay.const(1.0, dtype="float32"),
         in_zero_point=relay.const(0, dtype="int32"),
         out_scale=relay.const(1.0, dtype="float32"),
@@ -28,7 +32,7 @@ def fake_identity_func_relay(
         return (
             canonicalizations.create_integer_lookup_op(
                 input_arg=input_arg,
-                floating_point_func=self.fake_identity_func_numpy,
+                floating_point_func=floating_point_func,
                 in_scale=in_scale,
                 in_zero_point=in_zero_point,
                 out_scale=out_scale,
@@ -44,7 +48,7 @@ def fake_identity_func_relay(
     def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
         return (np_arr.astype("int32") - np_zero_point) * np_scale
 
-    def run_identity_function_test(
+    def run_function_test(
         self,
         in_scale: float,
         in_zero_point: int,
@@ -52,10 +56,14 @@ def run_identity_function_test(
         out_zero_point: int,
         in_dtype: str,
         out_dtype: str,
+        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
+        input_arg: relay.Expr = None,
         rtol=1e-7,
         atol=0,
     ):
         relay_lookup, input_arg = self.fake_identity_func_relay(
+            input_arg=input_arg,
+            floating_point_func=floating_point_func,
             in_scale=relay.const(in_scale, "float32"),
             in_zero_point=relay.const(in_zero_point, "int32"),
             out_scale=relay.const(out_scale, "float32"),
@@ -65,15 +73,18 @@ def run_identity_function_test(
         )
         result = canonicalizations.run_const_expr(relay_lookup)
         np.testing.assert_allclose(
-            self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point),
+            floating_point_func(
+                self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point)
+            ),
             self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point),
             atol=atol,
             rtol=rtol,
         )
 
+    """Test mapping between different input/output dtypes"""
+
     def test_int8_to_int8(self):
-        """Test int8 input to int8 output mapping workings"""
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=0,
             out_scale=1.0,
@@ -83,7 +94,7 @@ def test_int8_to_int8(self):
         )
 
     def test_uint8_to_uint8(self):
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=128,
             out_scale=1.0,
@@ -93,7 +104,7 @@ def test_uint8_to_uint8(self):
         )
 
     def test_int8_to_uint8(self):
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=0,
             out_scale=1.0,
@@ -103,7 +114,7 @@ def test_int8_to_uint8(self):
         )
 
     def test_uint8_to_int8(self):
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=128,
             out_scale=1.0,
@@ -112,9 +123,10 @@ def test_uint8_to_int8(self):
             out_dtype="int8",
         )
 
+    """Test mapping with different in/out qparams works."""
+
     def test_different_in_out_qparams(self):
-        """Test mapping with different in/out qparams works."""
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=128,
             out_scale=1.0,
@@ -125,64 +137,35 @@ def test_different_in_out_qparams(self):
             rtol=0,
         )
 
+    """Test some simple functions"""
 
-"""
-def test_fake_quantize_tanh():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.tanh(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_erf():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.erf(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_exp():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.exp(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_sigmoid():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.sigmoid(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+    def test_tanh(self):
+        # 1 / 64 in scale -- input range is ~ (-2, 2), tanh(+-2) ~= +-1
+        # 1 / 128 out_scale -- output range is ~(-1, 1)
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8")),
+            in_scale=1 / 64,
+            in_zero_point=0,
+            out_scale=1 / 128,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=np.tanh,
+            atol=0.01,
+            rtol=0.01,
+        )
 
-    compare_fq_to_int(op, [x_np])
-"""
+    def test_exp(self):
+        # input in floating point ~[-2, 2], final output ~[0, 8]
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8")),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=np.exp,
+            atol=0.03,
+            rtol=0.01,
+        )

From 173e25193d0812500e11bf61bd045a01661faca0 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 14:41:35 -0800
Subject: [PATCH 19/72] test on keeping shape good

---
 .../relay/qnn/test_canonicalizations.py       | 50 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index e13d96885051..334c628e5e54 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -15,8 +15,8 @@ def fake_identity_func_numpy(self, arr: np.ndarray):
 
     def fake_identity_func_relay(
         self,
+        floating_point_func: Callable[[np.ndarray], np.ndarray],
         input_arg=None,
-        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
         in_scale=relay.const(1.0, dtype="float32"),
         in_zero_point=relay.const(0, dtype="int32"),
         out_scale=relay.const(1.0, dtype="float32"),
@@ -56,7 +56,7 @@ def run_function_test(
         out_zero_point: int,
         in_dtype: str,
         out_dtype: str,
-        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
+        floating_point_func: Callable[[np.ndarray], np.ndarray],
         input_arg: relay.Expr = None,
         rtol=1e-7,
         atol=0,
@@ -91,6 +91,7 @@ def test_int8_to_int8(self):
             out_zero_point=0,
             in_dtype="int8",
             out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
         )
 
     def test_uint8_to_uint8(self):
@@ -101,6 +102,7 @@ def test_uint8_to_uint8(self):
             out_zero_point=128,
             in_dtype="uint8",
             out_dtype="uint8",
+            floating_point_func=self.fake_identity_func_numpy,
         )
 
     def test_int8_to_uint8(self):
@@ -111,6 +113,7 @@ def test_int8_to_uint8(self):
             out_zero_point=128,
             in_dtype="int8",
             out_dtype="uint8",
+            floating_point_func=self.fake_identity_func_numpy,
         )
 
     def test_uint8_to_int8(self):
@@ -121,6 +124,48 @@ def test_uint8_to_int8(self):
             out_zero_point=0,
             in_dtype="uint8",
             out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+        )
+
+    """Test different input shapes"""
+
+    def test_keep_input_shapes(self):
+        # input in floating point ~[-2, 2], final output ~[0, 8]
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 8, 8])),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+            atol=0.03,
+            rtol=0.01,
+        )
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 64])),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+            atol=0.03,
+            rtol=0.01,
+        )
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 128])),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+            atol=0.03,
+            rtol=0.01,
         )
 
     """Test mapping with different in/out qparams works."""
@@ -133,6 +178,7 @@ def test_different_in_out_qparams(self):
             out_zero_point=128,
             in_dtype="uint8",
             out_dtype="uint8",
+            floating_point_func=self.fake_identity_func_numpy,
             atol=1,  # numbers range from -128 -> 128 so not that big error
             rtol=0,
         )

From c4efbfb59f8a6b78d976db7da1242ae68d8f814e Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 14:42:00 -0800
Subject: [PATCH 20/72] lookup table fix

---
 tests/python/relay/qnn/test_canonicalizations.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index 334c628e5e54..84e01976f4f4 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -1,9 +1,7 @@
 from typing import Callable
 
 import numpy as np
-import tvm
 from tvm import relay
-from tvm.relay.op.transform import arange
 from tvm.relay.qnn.op import canonicalizations
 
 
From ddd8dd528808823a1ba75693096ba380c35ba3e1 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 15:04:48 -0800
Subject: [PATCH 21/72] replace canonicalization for rsqrt

---
 python/tvm/relay/qnn/op/canonicalizations.py | 23 +++++++++++++-------
 src/relay/qnn/op/rsqrt.cc                    |  4 ++--
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 19ee1b131908..b0ac0b1c15c1 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tvm
 from tvm import relay
+from tvm.relay.qnn.op.op import register_qnn_canonicalize
 
 
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
@@ -83,7 +84,7 @@ def create_integer_lookup_op(
     """
     TODO
     """
-    # TODO: handle multi-channel q, if below fails it's probably that
+    # TODO: handle multi-channel q, below will fail with multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
     out_scale = out_scale.data.numpy().item()
@@ -112,10 +113,16 @@ def create_integer_lookup_op(
     return result
 
 
-"""
-# TODO: better error messages if reference functions fail in FQ2I pass
-register_unary_elementwise_table_lookup_op("tanh", np.tanh)
-register_unary_elementwise_table_lookup_op("erf", special.erf)
-register_unary_elementwise_table_lookup_op("exp", np.exp)
-register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
-"""
+@register_qnn_canonicalize("qnn.rsqrt")
+def canonicalize_rsqrt(attrs, args, arg_types):
+    """Canonicalization for rsqrt"""
+    return create_integer_lookup_op(
+        args[0],
+        lambda arr: 1 / np.sqrt(arr),
+        args[1],
+        args[2],
+        args[3],
+        args[4],
+        in_dtype=arg_types[0].dtype,
+        out_dtype=arg_types[0].dtype,
+    )
diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc
index 55814dff422b..2bd73d758144 100644
--- a/src/relay/qnn/op/rsqrt.cc
+++ b/src/relay/qnn/op/rsqrt.cc
@@ -105,6 +105,7 @@ Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1);
 }
 
+// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py
 RELAY_REGISTER_OP("qnn.rsqrt")
     .describe("Elementwise rsqrt for quantized tensors.")
     .set_num_inputs(5)
@@ -116,8 +117,7 @@ RELAY_REGISTER_OP("qnn.rsqrt")
                   "The quantization zero_point of the output tensor.")
     .set_support_level(11)
     .add_type_rel("QRsqrt", QnnRsqrtRel)
-    .set_attr<TNonComputational>("TNonComputational", true)
-    .set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnRsqrtCanonicalize);
+    .set_attr<TNonComputational>("TNonComputational", true);
 
 TVM_REGISTER_GLOBAL("relay.qnn.op._make.rsqrt").set_body_typed(MakeQuantizedRsqrt);
 

From f65583ae814ac96049b1a6cf61e6c1d20b8940a6 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 15:05:47 -0800
Subject: [PATCH 22/72] remove canonicalization of rsqrt

---
 src/relay/qnn/op/rsqrt.cc | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc
index 2bd73d758144..6d37b1aa8d5d 100644
--- a/src/relay/qnn/op/rsqrt.cc
+++ b/src/relay/qnn/op/rsqrt.cc
@@ -69,42 +69,6 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale,
   return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {});
 }
 
-/*
- * \brief Canonicalizes the QNN rsqrt op.
- * \param attrs The empty attribute.
- * \param new_args The new mutated args to the call node.
- * \param arg_types The types of input and output.
- * \return The sequence of Relay ops for add op.
- */
-Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
-                          const Array<tvm::relay::Type>& arg_types) {
-  // At this time, due to the complexity of implementing this op in int8 or uint8,
-  // we dequantize the input, run the op in float, and then quantize the output (as below).
-  // This acts as a placeholder for future hardware enablement, where more hardware specific
-  // canonicalization can be provided.
-
-  // Get the args.
-  QnnUnaryOpArguments args(new_args);
-
-  // Get the input dtype and shape.
-  QnnUnaryOpTensorType input_type(arg_types, 0);
-
-  // Get the types for dequantize/quantize.
-  Array<tvm::relay::Type> types;
-  for (size_t i = 1; i < 5; ++i) {
-    types.push_back(arg_types[i]);
-  }
-
-  // Dequantize input.
-  auto dequantized_arg = Dequantize(args.x, args.scale, args.zero_point, types, -1);
-
-  // Compute Rsqrt(Q_x')
-  auto output = Rsqrt(dequantized_arg);
-
-  // Quantize output.
-  return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1);
-}
-
 // Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py
 RELAY_REGISTER_OP("qnn.rsqrt")
     .describe("Elementwise rsqrt for quantized tensors.")

From 0b8dc75a6bd1fca355e137ebeee668c5102c4862 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 20:55:19 -0800
Subject: [PATCH 23/72] add asf headers

---
 python/tvm/relay/qnn/op/canonicalizations.py     | 16 ++++++++++++++++
 tests/python/relay/qnn/test_canonicalizations.py | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index b0ac0b1c15c1..23c5e37f7ebe 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -1,3 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 from typing import Callable
 
 import numpy as np
diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index 84e01976f4f4..0505a88c07bd 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -1,3 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 from typing import Callable
 
 import numpy as np

From 3c29f6b92f8904697226df3fea4f8c4ed2744a52 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 13:45:04 -0800
Subject: [PATCH 24/72] topi tests

---
 tests/python/topi/python/test_topi_transform.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index 42d2463b8952..622d6946e913 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -18,14 +18,11 @@
 import numpy as np
 import pytest
 import tvm
-from tvm import te
-from tvm import topi
-from tvm import relay
+import tvm.testing
 import tvm.topi.testing
+from tvm import relay, te, topi
 from tvm.contrib.nvcc import have_fp16
 
-import tvm.testing
-
 
 def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     A = te.placeholder(shape=in_shape, name="A")
@@ -1010,6 +1007,16 @@ def test_gather():
     verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10)))
+    verify_gather(
+        np.random.randn(4, 7, 5),
+        2,
+        np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint32"),
+    )
+    verify_gather(
+        np.random.randn(4, 7, 5),
+        2,
+        np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint8"),
+    )
 
 
 @tvm.testing.uses_gpu

From eda9f19172426a184e2be845d3957390b0bdc3c7 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 13:51:14 -0800
Subject: [PATCH 25/72] gather supports unsigned integer tests

---
 tests/python/relay/test_op_level3.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 6d6a2a9b65ed..8c76dff0a0f5 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -21,10 +21,8 @@
 
 import numpy as np
 import pytest
-
 import tvm
 import tvm.testing
-
 from tvm import relay, te
 from tvm.error import TVMError
 from tvm.relay import create_executor, transform
@@ -32,7 +30,6 @@
 
 from utils import ref_funcs
 
-
 executor_kind = tvm.testing.parameter("graph", "debug")
 
 
@@ -1267,12 +1264,12 @@ def test_scatter_add(self, target, dev, ref_data, dshape, ishape, axis, dtype):
     ],
 )
 def test_gather(target, dev, executor_kind, data, axis, indices, ref_res):
-    def verify_gather(data, axis, indices, ref_res):
+    def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"):
         data = np.asarray(data, dtype="float32")
-        indices = np.asarray(indices, dtype="int32")
+        indices = np.asarray(indices, dtype=indices_dtype)
         ref_res = np.asarray(ref_res)
         d = relay.var("x", relay.TensorType(data.shape, "float32"))
-        i = relay.var("y", relay.TensorType(indices.shape, "int32"))
+        i = relay.var("y", relay.TensorType(indices.shape, indices_dtype))
         z = relay.gather(d, axis, i)
 
         func = relay.Function([d, i], z)
@@ -1283,6 +1280,7 @@ def verify_gather(data, axis, indices, ref_res):
         tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     verify_gather(data, axis, indices, ref_res)
+    verify_gather(data, axis, indices, ref_res, indices_dtype="uint32")
 
 
 def test_gather_nd(target, dev, executor_kind):

From ab25dc0f55fab88792bf6d3d2d22ca88589cca7e Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 14:10:24 -0800
Subject: [PATCH 26/72] fix things

---
 python/tvm/relay/qnn/op/canonicalizations.py | 51 ++++++++++++++++----
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 23c5e37f7ebe..db8b71e68998 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -23,7 +23,7 @@
 
 
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
-    """Run a const expression, receiving result as np array."""
+    """Evaluate a const expression, receiving result as np array."""
     mod = tvm.IRModule.from_expr(expr)
     vm_exe = relay.create_executor("vm", mod=mod)
     return vm_exe.evaluate()().asnumpy()
@@ -41,7 +41,24 @@ def create_integer_lookup_table(
     out_dtype: str = "uint8",
 ) -> np.ndarray:
     """
-    TODO
+    Return a table where each input indexes to the quantized output approximating the given function.
+
+    Note this also supports mapping unsigned and signed integers to each other.
+
+    Args:
+      floating_point_func: The numpy function which this table is to approximate
+      input_scale: The scale of the quantized input tensor.
+      input_zero_point: The zero point of the quantized input tensor.
+      output_scale: The scale of the quantized output tensor.
+      output_zero_point: The zero point of the quantized output tensor.
+      in_axis: The axis for multi-channel quantization of the input if applicable.
+      out_axis: The axis for multi-channel quantization of the output if applicable.
+      in_dtype: The dtype of the input tensor.
+      out_dtype: The wanted dtype of the output tensor.
+
+    Returns:
+      A numpy array where values in quantized space will index to the output in quantized space
+      approximating the given function.
     """
     if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
         np.dtype(out_dtype), np.integer
@@ -98,8 +115,24 @@ def create_integer_lookup_op(
     out_dtype: str = "uint8",
 ) -> "relay.Expr":
     """
-    TODO
+    Create a quantized version of the given floating point unary operation using table lookup.
+
+    Args:
+      input_arg: The quantized input to the final function.
+      floating_point_func: The numpy function which this table is to approximate
+      in_scale: The scale of the quantized input tensor.
+      in_zero_point: The zero point of the quantized input tensor.
+      out_scale: The scale of the quantized output tensor.
+      out_zero_point: The zero point of the quantized output tensor.
+      in_axis: The axis for multi-channel quantization of the input if applicable.
+      out_axis: The axis for multi-channel quantization of the output if applicable.
+      in_dtype: The dtype of the input tensor.
+      out_dtype: The wanted dtype of the output tensor.
+
+    Returns:
+      A Relay expression representing a quantized version of the given function.
     """
+
     # TODO: handle multi-channel q, below will fail with multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
@@ -133,12 +166,12 @@ def create_integer_lookup_op(
 def canonicalize_rsqrt(attrs, args, arg_types):
     """Canonicalization for rsqrt"""
     return create_integer_lookup_op(
-        args[0],
-        lambda arr: 1 / np.sqrt(arr),
-        args[1],
-        args[2],
-        args[3],
-        args[4],
+        input_arg=args[0],
+        floating_point_func=lambda arr: 1 / np.sqrt(arr),
+        in_scale=args[1],
+        in_zero_point=args[2],
+        out_scale=args[3],
+        out_zero_point=args[4],
         in_dtype=arg_types[0].dtype,
         out_dtype=arg_types[0].dtype,
     )

From fcc83131feb2b515450301a59809a9a4d74894c1 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 14:37:40 -0800
Subject: [PATCH 27/72] move to legalization

---
 python/tvm/relay/qnn/op/canonicalizations.py | 15 ---------------
 python/tvm/relay/qnn/op/legalizations.py     | 20 ++++++++++++++++++--
 src/relay/qnn/op/rsqrt.cc                    |  4 +++-
 tests/python/relay/test_op_qnn_rsqrt.py      |  4 +++-
 4 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index db8b71e68998..bd0108d72e64 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -160,18 +160,3 @@ def create_integer_lookup_op(
     result = relay.gather(lookup_table, -1, index_tensor)
     result = relay.reshape_like(result, input_arg)
     return result
-
-
-@register_qnn_canonicalize("qnn.rsqrt")
-def canonicalize_rsqrt(attrs, args, arg_types):
-    """Canonicalization for rsqrt"""
-    return create_integer_lookup_op(
-        input_arg=args[0],
-        floating_point_func=lambda arr: 1 / np.sqrt(arr),
-        in_scale=args[1],
-        in_zero_point=args[2],
-        out_scale=args[3],
-        out_zero_point=args[4],
-        in_dtype=arg_types[0].dtype,
-        out_dtype=arg_types[0].dtype,
-    )
diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 52fe6c8ebe2f..947e9d823134 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -17,12 +17,13 @@
 # pylint: disable=invalid-name, unused-argument
 """Backend QNN related feature registration"""
 import numpy as np
-
 import tvm
 from tvm import relay
 from tvm._ffi.base import TVMError
-from .. import op as reg
+from tvm.relay.qnn.op.canonicalizations import create_integer_lookup_op
+
 from ....topi.x86.utils import target_has_sse42
+from .. import op as reg
 
 #################################################
 # Register the functions for different operators.
@@ -46,6 +47,21 @@ def legalize_qnn_dense(attrs, inputs, types):
     return qnn_dense_legalize(attrs, inputs, types)
 
 
+# Registering QNN dense legalization function.
+@reg.register_qnn_legalize("qnn.rsqrt")
+def legalize_qnn_dense(attrs, inputs, types):
+    return create_integer_lookup_op(
+        input_arg=inputs[0],
+        floating_point_func=lambda arr: 1 / np.sqrt(arr),
+        in_scale=inputs[1],
+        in_zero_point=inputs[2],
+        out_scale=inputs[3],
+        out_zero_point=inputs[4],
+        in_dtype=types[0].dtype,
+        out_dtype=types[0].dtype,
+    )
+
+
 # Default to None. If overridden by target, this will not be run.
 # Generic QNN Conv2D legalization function.
 @tvm.target.generic_func
diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc
index 6d37b1aa8d5d..93baa308a796 100644
--- a/src/relay/qnn/op/rsqrt.cc
+++ b/src/relay/qnn/op/rsqrt.cc
@@ -69,7 +69,9 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale,
   return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {});
 }
 
-// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py
+// Translation to relay is done via canonicalization/legalization functions in python
+// e.g. python/tvm/relay/qnn/op/canonicalizations.py or
+//      python/tvm/relay/qnn/op/legalizations.py
 RELAY_REGISTER_OP("qnn.rsqrt")
     .describe("Elementwise rsqrt for quantized tensors.")
     .set_num_inputs(5)
diff --git a/tests/python/relay/test_op_qnn_rsqrt.py b/tests/python/relay/test_op_qnn_rsqrt.py
index 1eb9b64057ca..0e40768343bd 100644
--- a/tests/python/relay/test_op_qnn_rsqrt.py
+++ b/tests/python/relay/test_op_qnn_rsqrt.py
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import tvm
 import numpy as np
+import tvm
 from tvm import relay
 
 
@@ -51,6 +51,7 @@ def test_saturation():
     func = relay.Function([x], y)
     mod = tvm.IRModule.from_expr(func)
     mod = relay.transform.InferType()(mod)
+    mod = relay.qnn.transform.Legalize()(mod)
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     func = mod["main"]
 
@@ -77,6 +78,7 @@ def test_saturation():
     func = relay.Function([x], y)
     mod = tvm.IRModule.from_expr(func)
     mod = relay.transform.InferType()(mod)
+    mod = relay.qnn.transform.Legalize()(mod)
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     func = mod["main"]
 

From 72e150fb685a67700e12dc439eacd23567949f1b Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 11:06:40 -0800
Subject: [PATCH 28/72] jostle ci


From 19de289f7ee901dd2d519261158b5ed1ccc28e32 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 16:32:17 -0800
Subject: [PATCH 29/72] linting

---
 python/tvm/relay/qnn/op/canonicalizations.py               | 4 ++--
 python/tvm/relay/qnn/op/legalizations.py                   | 2 +-
 python/tvm/relay/qnn/op/op.py                              | 2 +-
 python/tvm/relay/transform/fake_quantization_to_integer.py | 1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index bd0108d72e64..05b68e731239 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -14,12 +14,12 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+"""Consist of utilities and methods for lowering QNN into mainline relay."""
 from typing import Callable
 
 import numpy as np
 import tvm
 from tvm import relay
-from tvm.relay.qnn.op.op import register_qnn_canonicalize
 
 
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
@@ -41,7 +41,7 @@ def create_integer_lookup_table(
     out_dtype: str = "uint8",
 ) -> np.ndarray:
     """
-    Return a table where each input indexes to the quantized output approximating the given function.
+    Return a table where each input indexes to the output quantizing the given function.
 
     Note this also supports mapping unsigned and signed integers to each other.
 
diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 947e9d823134..fd835d72fc09 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -49,7 +49,7 @@ def legalize_qnn_dense(attrs, inputs, types):
 
 # Registering QNN dense legalization function.
 @reg.register_qnn_legalize("qnn.rsqrt")
-def legalize_qnn_dense(attrs, inputs, types):
+def legalize_qnn_rsqrt(attrs, inputs, types):
     return create_integer_lookup_op(
         input_arg=inputs[0],
         floating_point_func=lambda arr: 1 / np.sqrt(arr),
diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py
index c83a32e2ce6a..335947b9f7ce 100644
--- a/python/tvm/relay/qnn/op/op.py
+++ b/python/tvm/relay/qnn/op/op.py
@@ -49,7 +49,7 @@ def register_qnn_canonicalize(op_name, legal_op=None, level=10):
     op_name : str
         The name of the operator
 
-    legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr
+    legal_op: function (Attrs, List[Expr], List[relay.Type]) -> Expr
         The function for transforming an expr to another expr.
 
     level : int
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 3337c7cfb894..7398dc98c83d 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -21,6 +21,7 @@
 from tvm.ir import TensorAffineType, TupleAffineType
 
 # import to register canonicalization funcs for fq2i
+# pylint: disable=unused-import
 from tvm.relay.qnn.op import canonicalizations
 from tvm.tir import bijective_layout
 

From 76fb6bcb6a30193de55d275af39b9a52c3b3f7dd Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:07:55 -0800
Subject: [PATCH 30/72] use take instead of gather

---
 python/tvm/relay/qnn/op/canonicalizations.py | 6 ++----
 src/relay/op/tensor/transform.cc             | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 05b68e731239..95e0cb60368d 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -155,8 +155,6 @@ def create_integer_lookup_op(
     in_dtype_num_bits = in_dtype_info.bits
 
     lookup_table = relay.const(lookup_table)
-    index_tensor = relay.reshape(input_arg, [-1])
-    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
-    result = relay.gather(lookup_table, -1, index_tensor)
-    result = relay.reshape_like(result, input_arg)
+    index_tensor = relay.reinterpret(input_arg, f"uint{in_dtype_num_bits}")
+    result = relay.take(lookup_table, index_tensor, axis=0, mode="fast")
     return result
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index ebf8a0653b70..a5b9f343d664 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -1276,7 +1276,8 @@ bool TakeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
   if (indices == nullptr) {
     return false;
   }
-  ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer";
+  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint())
+      << "indices of take must be tensor of integer";
   const auto param = attrs.as<TakeAttrs>();
   ICHECK(param != nullptr);
 

From 46f82c06d9b98070a797279fe19a94db0b1fa673 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:10:46 -0800
Subject: [PATCH 31/72] remove gather changes

---
 include/tvm/topi/transform.h                    |  2 +-
 src/relay/op/tensor/transform.cc                |  3 +--
 tests/python/relay/test_op_level3.py            |  7 +++----
 tests/python/topi/python/test_topi_transform.py | 10 ----------
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h
index 83efd1c5825d..1ad9d7da72ba 100644
--- a/include/tvm/topi/transform.h
+++ b/include/tvm/topi/transform.h
@@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices,
     size_t indices_dim_i = static_cast<size_t>(GetConstInt(indices->shape[axis]));
     ICHECK_GE(indices_dim_i, 1);
   }
-  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint());
+  ICHECK(indices->dtype.is_int());
 
   Array<PrimExpr> out_shape;
   for (size_t i = 0; i < ndim_i; ++i) {
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index a5b9f343d664..d56150d8aa78 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3319,8 +3319,7 @@ bool GatherRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
         << "Gather: expect indices type to be TensorType but get " << types[1];
     return false;
   }
-  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint())
-      << "indices of gather must be tensor of integer";
+  ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer";
   const auto param = attrs.as<GatherAttrs>();
   ICHECK(param != nullptr);
   ICHECK(param->axis.defined());
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 8c76dff0a0f5..3f345e04d8f0 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -1264,12 +1264,12 @@ def test_scatter_add(self, target, dev, ref_data, dshape, ishape, axis, dtype):
     ],
 )
 def test_gather(target, dev, executor_kind, data, axis, indices, ref_res):
-    def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"):
+    def verify_gather(data, axis, indices, ref_res):
         data = np.asarray(data, dtype="float32")
-        indices = np.asarray(indices, dtype=indices_dtype)
+        indices = np.asarray(indices, dtype="int32")
         ref_res = np.asarray(ref_res)
         d = relay.var("x", relay.TensorType(data.shape, "float32"))
-        i = relay.var("y", relay.TensorType(indices.shape, indices_dtype))
+        i = relay.var("y", relay.TensorType(indices.shape, "int32"))
         z = relay.gather(d, axis, i)
 
         func = relay.Function([d, i], z)
@@ -1280,7 +1280,6 @@ def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"):
         tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     verify_gather(data, axis, indices, ref_res)
-    verify_gather(data, axis, indices, ref_res, indices_dtype="uint32")
 
 
 def test_gather_nd(target, dev, executor_kind):
diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index 622d6946e913..c0f595e8b5db 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -1007,16 +1007,6 @@ def test_gather():
     verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10)))
-    verify_gather(
-        np.random.randn(4, 7, 5),
-        2,
-        np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint32"),
-    )
-    verify_gather(
-        np.random.randn(4, 7, 5),
-        2,
-        np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint8"),
-    )
 
 
 @tvm.testing.uses_gpu

From 520f4f1f1c42406730d53f066e6ccf4e7b8e3b1b Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:12:15 -0800
Subject: [PATCH 32/72] undo changes

---
 tests/python/topi/python/test_topi_transform.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index c0f595e8b5db..cf2431bba982 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -18,11 +18,13 @@
 import numpy as np
 import pytest
 import tvm
-import tvm.testing
+from tvm import te
+from tvm import topi
+from tvm import relay
 import tvm.topi.testing
-from tvm import relay, te, topi
 from tvm.contrib.nvcc import have_fp16
 
+import tvm.testing
 
 def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     A = te.placeholder(shape=in_shape, name="A")

From 7a0f43be4fec5b80c78a9e903ed5d48a5e12b2f4 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:12:35 -0800
Subject: [PATCH 33/72] undo changes

---
 tests/python/topi/python/test_topi_transform.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index cf2431bba982..42d2463b8952 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -26,6 +26,7 @@
 
 import tvm.testing
 
+
 def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     A = te.placeholder(shape=in_shape, name="A")
     B = topi.expand_dims(A, axis, num_newaxis)

From 6f8f34a3271f27bfe9aae70cd72c6cb382b6522e Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:13:14 -0800
Subject: [PATCH 34/72] undo changes

---
 tests/python/relay/test_op_level3.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 3f345e04d8f0..6d6a2a9b65ed 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -21,8 +21,10 @@
 
 import numpy as np
 import pytest
+
 import tvm
 import tvm.testing
+
 from tvm import relay, te
 from tvm.error import TVMError
 from tvm.relay import create_executor, transform
@@ -30,6 +32,7 @@
 
 from utils import ref_funcs
 
+
 executor_kind = tvm.testing.parameter("graph", "debug")
 
 
From 4e7b96aaa8b11a31694530b37ce0eaa5356a8880 Mon Sep 17 00:00:00 2001
From: "andrewzhaoluo (generated by with_the_same_user script)"
 <andrewzhaoluo@system76-pc.localdomain>
Date: Fri, 28 Jan 2022 19:34:45 +0000
Subject: [PATCH 35/72] move thing in range

---
 .../relay/test_pass_fake_quantization_to_integer.py    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 28166bb8be72..9cc359d472fd 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -305,14 +305,14 @@ def test_fake_quantize_global_avg_pool():
 
 
 def test_fake_quantize_rsqrt():
-    x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
-    zero = relay.const(0)
+    x = relay.var("x", shape=[1, 3, 3, 3], dtype="int8")
+    mid_point = relay.const(-128)
 
-    x = relay.qnn.op.dequantize(x, relay.const(2.0), zero)
+    x = relay.qnn.op.dequantize(x, relay.const(0.125), mid_point)
     op = relay.rsqrt(x)
-    op = relay.qnn.op.quantize(op, relay.const(2.0), zero)
+    op = relay.qnn.op.quantize(op, relay.const(0.125), mid_point)
 
-    x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
+    x_np = np.random.randint(-128, 127, size=[1, 3, 3, 3], dtype="int8")
 
     compare_fq_to_int(op, [x_np], True)
 

From 40d5a287e52f2d5ec70c69ed6336f37beaf3659d Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 12 Jan 2022 16:31:31 -0800
Subject: [PATCH 36/72] initial tanh impl

---
 .../transform/fake_quantization_to_integer.py | 104 ++++++++++++++++++
 .../test_pass_fake_quantization_to_integer.py |  16 +++
 2 files changed, 120 insertions(+)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index db46c2cbfd58..19e7cee6a9dc 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -92,6 +92,110 @@ def identity(expr, type_map):
     return register_fake_quantization_to_integer(op_name, identity)
 
 
+# TODO: replace with constant folding
+def run_const_expr(expr):
+    mod = tvm.IRModule.from_expr(expr)
+    vm_exe = relay.create_executor("vm", mod=mod)
+    return vm_exe.evaluate()().asnumpy()
+
+
+def create_integer_lookup_table(
+    floating_point_func,
+    input_scale,
+    input_zero_point,
+    output_scale,
+    output_zero_point,
+    in_axis=-1,
+    out_axis=-1,
+    in_dtype="uint8",
+    out_dtype="uint8",
+):
+    if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
+        np.dtype(out_dtype), np.integer
+    ):
+        raise ValueError(
+            f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes."
+        )
+
+    dtype_info = np.iinfo(in_dtype)
+
+    # Use TVMs quantization methods via relay to be consistent
+    inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+    inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
+    inputs_dequantized = run_const_expr(
+        relay.qnn.op.dequantize(
+            inputs_quantized,
+            input_scale=input_scale,
+            input_zero_point=input_zero_point,
+            axis=in_axis,
+        )
+    )
+
+    output_dequantized = relay.const(floating_point_func(inputs_dequantized))
+    output_quantized = run_const_expr(
+        relay.qnn.op.quantize(
+            output_dequantized, output_scale, output_zero_point, out_axis, out_dtype
+        )
+    )
+
+    return output_quantized
+
+
+def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
+    """Implement an operator in quantized space via table lookup operations (e.g. via gather).
+
+    op_name: str
+        The name of the operator to register for FQ2I.
+
+    example_func: Callable[[np.ndarray], np.ndarray]
+        The FP32 version of the function to quantize operating on numpy arrays.
+    """
+
+    def func(expr, type_map):
+        assert len(expr.args) == 1
+        arg = expr.args[0]
+        in_scale = fold_constant(type_map[arg].scale)
+        in_zero_point = fold_constant(type_map[arg].zero_point)
+        out_scale = fold_constant(type_map[expr].scale)
+        out_zero_point = fold_constant(type_map[expr].zero_point)
+        if (
+            not isinstance(in_scale, relay.Constant)
+            or not isinstance(in_zero_point, relay.Constant)
+            or not isinstance(out_scale, relay.Constant)
+            or not isinstance(out_zero_point, relay.Constant)
+        ):
+            raise ValueError(
+                f"{op_name} requires input/output quantization params to be known at compile time!"
+            )
+
+        # TODO: handle multi-channel q
+        in_scale = in_scale.data.numpy().item()
+        in_zero_point = in_zero_point.data.numpy().item()
+        out_scale = out_scale.data.numpy().item()
+        out_zero_point = out_zero_point.data.numpy().item()
+
+        lookup_table = create_integer_lookup_table(
+            floating_point_func,
+            relay.const(in_scale),
+            relay.const(in_zero_point, dtype="int32"),
+            relay.const(out_scale),
+            relay.const(out_zero_point, dtype="int32"),
+            in_axis=type_map[arg].axis,
+            in_dtype=type_map[arg].dtype,
+            out_axis=type_map[expr].axis,
+            out_dtype=type_map[expr].dtype,
+        )
+        lookup_table = relay.const(lookup_table)
+        index_tensor = relay.reshape(arg, [-1])
+        result = relay.gather(lookup_table, -1, index_tensor)
+        result = relay.reshape_like(result, arg)
+        return [result, type_map[expr]]
+
+    return register_fake_quantization_to_integer(op_name, func)
+
+
+register_unary_elementwise_table_lookup_op("tanh", np.tanh)
+
 register_unary_identity("reshape")
 register_unary_identity("squeeze")
 register_unary_identity("strided_slice")
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index aee2741782fd..9a2384705982 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -26,6 +26,7 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False):
     mod = tvm.relay.transform.InferType()(mod)
 
     mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod)
+    breakpoint()
     assert not tvm.ir.structural_equal(mod, mod_int)
 
     result = (
@@ -586,6 +587,21 @@ def run_test_case(partial_func):
     run_test_case(lambda x: relay.op.min(x, axis=1))
 
 
+def test_fake_quantize_tanh():
+    x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(2.0), zero)
+    op = relay.op.tanh(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(1.0), zero)
+
+    x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
 def test_fq_hard_fail():
     @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11)
     def conv2d(expr, type_map):  # pylint: disable=unused-variable

From 95537afed588596359a6be041ed0a42b2124b4b6 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 12 Jan 2022 16:42:28 -0800
Subject: [PATCH 37/72] smalls error

---
 python/tvm/relay/transform/fake_quantization_to_integer.py   | 1 +
 tests/python/relay/test_pass_fake_quantization_to_integer.py | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 19e7cee6a9dc..84c22f8f9a94 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -189,6 +189,7 @@ def func(expr, type_map):
         index_tensor = relay.reshape(arg, [-1])
         result = relay.gather(lookup_table, -1, index_tensor)
         result = relay.reshape_like(result, arg)
+        breakpoint()
         return [result, type_map[expr]]
 
     return register_fake_quantization_to_integer(op_name, func)
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 9a2384705982..113f1f21759b 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -591,11 +591,11 @@ def test_fake_quantize_tanh():
     x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
 
     zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(2.0), zero)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
     op = relay.op.tanh(x)
 
     # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(1.0), zero)
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
 
     x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
 

From 496c2509b4b7ef3a375070b71aa0d698486bb4eb Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 18 Jan 2022 13:48:16 -0800
Subject: [PATCH 38/72] support uint and int lookup into tables

---
 .../transform/fake_quantization_to_integer.py    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 84c22f8f9a94..24907dc0c2ec 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -119,8 +119,20 @@ def create_integer_lookup_table(
 
     dtype_info = np.iinfo(in_dtype)
 
+    num_bits = dtype_info.bits
+
     # Use TVMs quantization methods via relay to be consistent
-    inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+    # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+
+    # First generate a list of all num_bit integer patterns
+    inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}")
+
+    # Reinterpret bits as the real datatype
+    # Note what we are doing here is a bit tricky, the canonical view of our lookup table
+    # is using the uintX version. When we run the lookup in the relay graph, we note 
+    # that the "gather" operation used supports negative indices which make the mapping
+    # valid! 
+    inputs_quantized = inputs_quantized.view(in_dtype)
     inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
     inputs_dequantized = run_const_expr(
         relay.qnn.op.dequantize(
@@ -185,11 +197,11 @@ def func(expr, type_map):
             out_axis=type_map[expr].axis,
             out_dtype=type_map[expr].dtype,
         )
+        
         lookup_table = relay.const(lookup_table)
         index_tensor = relay.reshape(arg, [-1])
         result = relay.gather(lookup_table, -1, index_tensor)
         result = relay.reshape_like(result, arg)
-        breakpoint()
         return [result, type_map[expr]]
 
     return register_fake_quantization_to_integer(op_name, func)

From 2334e1c364a79ea2b4cea7ed4f26459a660b0e41 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 18 Jan 2022 15:40:08 -0800
Subject: [PATCH 39/72] reinterpret cast, working tanh tests

---
 .../relay/transform/fake_quantization_to_integer.py   | 11 +++++++----
 .../relay/test_pass_fake_quantization_to_integer.py   |  5 ++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 24907dc0c2ec..e15398c93e06 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -129,9 +129,8 @@ def create_integer_lookup_table(
 
     # Reinterpret bits as the real datatype
     # Note what we are doing here is a bit tricky, the canonical view of our lookup table
-    # is using the uintX version. When we run the lookup in the relay graph, we note 
-    # that the "gather" operation used supports negative indices which make the mapping
-    # valid! 
+    # is using the uintX version. When we run the lookup in the relay graph, we cast the
+    # bit pattern back into this form.
     inputs_quantized = inputs_quantized.view(in_dtype)
     inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
     inputs_dequantized = run_const_expr(
@@ -197,9 +196,13 @@ def func(expr, type_map):
             out_axis=type_map[expr].axis,
             out_dtype=type_map[expr].dtype,
         )
-        
+
+        in_dtype_info = np.iinfo(type_map[arg].dtype)
+        in_dtype_num_bits = in_dtype_info.bits
+
         lookup_table = relay.const(lookup_table)
         index_tensor = relay.reshape(arg, [-1])
+        index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
         result = relay.gather(lookup_table, -1, index_tensor)
         result = relay.reshape_like(result, arg)
         return [result, type_map[expr]]
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 113f1f21759b..8c016e4c8976 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -26,7 +26,6 @@ def compare_fq_to_int(expr, args, allow_rounding_error=False):
     mod = tvm.relay.transform.InferType()(mod)
 
     mod_int = tvm.relay.transform.FakeQuantizationToInteger()(mod)
-    breakpoint()
     assert not tvm.ir.structural_equal(mod, mod_int)
 
     result = (
@@ -588,7 +587,7 @@ def run_test_case(partial_func):
 
 
 def test_fake_quantize_tanh():
-    x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
 
     zero = relay.const(0)
     x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
@@ -597,7 +596,7 @@ def test_fake_quantize_tanh():
     # Have difference scales for input/output to test if can handle
     op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
 
-    x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
 
     compare_fq_to_int(op, [x_np])
 

From 5c65eb17f648fc4fe5ecfcb55bcad32e63147b9b Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 09:47:56 -0800
Subject: [PATCH 40/72] refactor relay func creation

---
 .../transform/fake_quantization_to_integer.py | 65 ++++++++++++++++++-
 .../test_pass_fake_quantization_to_integer.py | 27 ++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index e15398c93e06..eca3d12d7bcd 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -152,6 +152,50 @@ def create_integer_lookup_table(
     return output_quantized
 
 
+def create_integer_lookup_op(
+    input_arg,
+    floating_point_func,
+    in_scale,
+    in_zero_point,
+    out_scale,
+    out_zero_point,
+    in_axis=-1,
+    out_axis=-1,
+    in_dtype="uint8",
+    out_dtype="uint8",
+):
+    """
+    TODO
+    """
+    # TODO: handle multi-channel q
+    in_scale = in_scale.data.numpy().item()
+    in_zero_point = in_zero_point.data.numpy().item()
+    out_scale = out_scale.data.numpy().item()
+    out_zero_point = out_zero_point.data.numpy().item()
+
+    lookup_table = create_integer_lookup_table(
+        floating_point_func,
+        relay.const(in_scale),
+        relay.const(in_zero_point, dtype="int32"),
+        relay.const(out_scale),
+        relay.const(out_zero_point, dtype="int32"),
+        in_axis=in_axis,
+        in_dtype=in_dtype,
+        out_axis=out_axis,
+        out_dtype=out_dtype,
+    )
+
+    in_dtype_info = np.iinfo(in_dtype)
+    in_dtype_num_bits = in_dtype_info.bits
+
+    lookup_table = relay.const(lookup_table)
+    index_tensor = relay.reshape(input_arg, [-1])
+    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
+    result = relay.gather(lookup_table, -1, index_tensor)
+    result = relay.reshape_like(result, input_arg)
+    return result
+
+
 def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
     """Implement an operator in quantized space via table lookup operations (e.g. via gather).
 
@@ -163,12 +207,31 @@ def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
     """
 
     def func(expr, type_map):
-        assert len(expr.args) == 1
+        assert len(expr.args) == 1, "only support elemwise ops for now!"
         arg = expr.args[0]
         in_scale = fold_constant(type_map[arg].scale)
         in_zero_point = fold_constant(type_map[arg].zero_point)
         out_scale = fold_constant(type_map[expr].scale)
         out_zero_point = fold_constant(type_map[expr].zero_point)
+        in_axis = type_map[arg].axis
+        in_dtype = type_map[arg].dtype
+        out_axis = type_map[expr].axis
+        out_dtype = type_map[expr].dtype
+        result = create_integer_lookup_op(
+            input_arg=arg,
+            floating_point_func=floating_point_func,
+            in_scale=in_scale,
+            in_zero_point=in_zero_point,
+            out_scale=out_scale,
+            out_zero_point=out_zero_point,
+            in_axis=in_axis,
+            in_dtype=in_dtype,
+            out_axis=out_axis,
+            out_dtype=out_dtype,
+        )
+        return [result, type_map[expr]]
+        arg = expr.args[0]
+
         if (
             not isinstance(in_scale, relay.Constant)
             or not isinstance(in_zero_point, relay.Constant)
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 8c016e4c8976..a23a316ae50f 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -19,6 +19,7 @@
 import pytest
 import tvm
 from tvm import relay
+from tvm.relay.transform import fake_quantization_to_integer
 
 
 def compare_fq_to_int(expr, args, allow_rounding_error=False):
@@ -586,6 +587,32 @@ def run_test_case(partial_func):
     run_test_case(lambda x: relay.op.min(x, axis=1))
 
 
+class TestIntegerTableLookupTable:
+    """Consists of tests testing functionality of creating lookup tables for integer operations."""
+
+    # def __init__(self) -> None:
+    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
+
+    def fake_identity_func_numpy(self, arr: np.ndarray):
+        return arr.astype("float32")
+
+    """
+    def fake_identity_func_relay(self):
+        fake_quantization_to_integer.register_fake_quantization_to_integer.fu
+    """
+
+    def test_int8_to_int8(self):
+        relay_result = fake_quantization_to_integer.create_integer_lookup_table(
+            self.uint8_identity_func,
+            relay.const(1.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+            relay.const(1.0, dtype="float32"),
+            relay.const(0, dtype="int32"),
+            in_dtype="int8",
+            out_dtype="int8",
+        )
+
+
 def test_fake_quantize_tanh():
     x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
 

From 7b865e09c49f7805a9a9ef34422ee4346e48e3d8 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 10:15:41 -0800
Subject: [PATCH 41/72] basic casting tests

---
 .../transform/fake_quantization_to_integer.py | 39 ----------
 .../test_pass_fake_quantization_to_integer.py | 74 ++++++++++++++++---
 2 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index eca3d12d7bcd..96f5faab9b90 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -230,45 +230,6 @@ def func(expr, type_map):
             out_dtype=out_dtype,
         )
         return [result, type_map[expr]]
-        arg = expr.args[0]
-
-        if (
-            not isinstance(in_scale, relay.Constant)
-            or not isinstance(in_zero_point, relay.Constant)
-            or not isinstance(out_scale, relay.Constant)
-            or not isinstance(out_zero_point, relay.Constant)
-        ):
-            raise ValueError(
-                f"{op_name} requires input/output quantization params to be known at compile time!"
-            )
-
-        # TODO: handle multi-channel q
-        in_scale = in_scale.data.numpy().item()
-        in_zero_point = in_zero_point.data.numpy().item()
-        out_scale = out_scale.data.numpy().item()
-        out_zero_point = out_zero_point.data.numpy().item()
-
-        lookup_table = create_integer_lookup_table(
-            floating_point_func,
-            relay.const(in_scale),
-            relay.const(in_zero_point, dtype="int32"),
-            relay.const(out_scale),
-            relay.const(out_zero_point, dtype="int32"),
-            in_axis=type_map[arg].axis,
-            in_dtype=type_map[arg].dtype,
-            out_axis=type_map[expr].axis,
-            out_dtype=type_map[expr].dtype,
-        )
-
-        in_dtype_info = np.iinfo(type_map[arg].dtype)
-        in_dtype_num_bits = in_dtype_info.bits
-
-        lookup_table = relay.const(lookup_table)
-        index_tensor = relay.reshape(arg, [-1])
-        index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
-        result = relay.gather(lookup_table, -1, index_tensor)
-        result = relay.reshape_like(result, arg)
-        return [result, type_map[expr]]
 
     return register_fake_quantization_to_integer(op_name, func)
 
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index a23a316ae50f..714dce4e6186 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -596,21 +596,75 @@ class TestIntegerTableLookupTable:
     def fake_identity_func_numpy(self, arr: np.ndarray):
         return arr.astype("float32")
 
-    """
-    def fake_identity_func_relay(self):
-        fake_quantization_to_integer.register_fake_quantization_to_integer.fu
-    """
+    def fake_identity_func_relay(
+        self,
+        input_arg=None,
+        in_scale=relay.const(1.0, dtype="float32"),
+        in_zero_point=relay.const(0, dtype="int32"),
+        out_scale=relay.const(1.0, dtype="float32"),
+        out_zero_point=relay.const(0, dtype="int32"),
+        in_axis=-1,
+        out_axis=-1,
+        in_dtype="uint8",
+        out_dtype="uint8",
+    ):
+        if input_arg is None:
+            input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype))
+
+        return (
+            fake_quantization_to_integer.create_integer_lookup_op(
+                input_arg=input_arg,
+                floating_point_func=self.fake_identity_func_numpy,
+                in_scale=in_scale,
+                in_zero_point=in_zero_point,
+                out_scale=out_scale,
+                out_zero_point=out_zero_point,
+                in_axis=in_axis,
+                out_axis=out_axis,
+                in_dtype=in_dtype,
+                out_dtype=out_dtype,
+            ),
+            input_arg.data.numpy(),
+        )
+
+    def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
+        return (np_arr.astype("int32") - np_zero_point) * np_scale
 
     def test_int8_to_int8(self):
-        relay_result = fake_quantization_to_integer.create_integer_lookup_table(
-            self.uint8_identity_func,
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
-            relay.const(1.0, dtype="float32"),
-            relay.const(0, dtype="int32"),
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_uint8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_int8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            out_scale=relay.const(1.0, dtype="float32"),
+            out_zero_point=relay.const(128, dtype="int32"),
             in_dtype="int8",
+            out_dtype="uint8",
+        )
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg),
+            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
+        )
+
+    def test_uint8_to_int8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            in_scale=relay.const(1.0, dtype="float32"),
+            in_zero_point=relay.const(128, dtype="int32"),
+            in_dtype="uint8",
             out_dtype="int8",
         )
+        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
+            self.dequantize_numpy(result),
+        )
 
 
 def test_fake_quantize_tanh():

From f2934c015707cb883014b2b6637eb7b17d3b0f3d Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 10:18:26 -0800
Subject: [PATCH 42/72] explicitly say do not handle multi-channel lookups

---
 .../relay/transform/fake_quantization_to_integer.py    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 96f5faab9b90..f7278801fec2 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -167,12 +167,20 @@ def create_integer_lookup_op(
     """
     TODO
     """
-    # TODO: handle multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
     out_scale = out_scale.data.numpy().item()
     out_zero_point = out_zero_point.data.numpy().item()
 
+    # TODO: handle multi-channel q
+    if (
+        in_scale.size() > 1
+        or out_scale.size() > 1
+        or in_zero_point.size() > 1
+        or out_zero_point.size() > 1
+    ):
+        raise ValueError("Do no support multi-channel quantization for now")
+
     lookup_table = create_integer_lookup_table(
         floating_point_func,
         relay.const(in_scale),

From a16a35269364367c8b32888d9dcd4a3a45f80ba3 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:27:56 -0800
Subject: [PATCH 43/72] add example funcs

---
 .../transform/fake_quantization_to_integer.py |  6 ++-
 .../test_pass_fake_quantization_to_integer.py | 45 +++++++++++++++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index f7278801fec2..c595202ea541 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -169,6 +169,7 @@ def create_integer_lookup_op(
     """
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
+
     out_scale = out_scale.data.numpy().item()
     out_zero_point = out_zero_point.data.numpy().item()
 
@@ -179,7 +180,7 @@ def create_integer_lookup_op(
         or in_zero_point.size() > 1
         or out_zero_point.size() > 1
     ):
-        raise ValueError("Do no support multi-channel quantization for now")
+        raise ValueError("Do not support multi-channel quantization for now")
 
     lookup_table = create_integer_lookup_table(
         floating_point_func,
@@ -243,6 +244,9 @@ def func(expr, type_map):
 
 
 register_unary_elementwise_table_lookup_op("tanh", np.tanh)
+register_unary_elementwise_table_lookup_op("erf", np.math.erf)
+register_unary_elementwise_table_lookup_op("exp", np.math.exp)
+register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
 
 register_unary_identity("reshape")
 register_unary_identity("squeeze")
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 714dce4e6186..a7ab19bd059f 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -682,6 +682,51 @@ def test_fake_quantize_tanh():
     compare_fq_to_int(op, [x_np])
 
 
+def test_fake_quantize_erf():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.erf(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_exp():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.exp(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_sigmoid():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.sigmoid(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
 def test_fq_hard_fail():
     @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11)
     def conv2d(expr, type_map):  # pylint: disable=unused-variable

From b28a65e3b2704a59dda3193b74ae4168b086c339 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:34:53 -0800
Subject: [PATCH 44/72] fix silent fail

---
 .../relay/transform/fake_quantization_to_integer.py  | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index c595202ea541..63dabf98088e 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -167,21 +167,13 @@ def create_integer_lookup_op(
     """
     TODO
     """
+
+    # TODO: handle multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
-
     out_scale = out_scale.data.numpy().item()
     out_zero_point = out_zero_point.data.numpy().item()
 
-    # TODO: handle multi-channel q
-    if (
-        in_scale.size() > 1
-        or out_scale.size() > 1
-        or in_zero_point.size() > 1
-        or out_zero_point.size() > 1
-    ):
-        raise ValueError("Do not support multi-channel quantization for now")
-
     lookup_table = create_integer_lookup_table(
         floating_point_func,
         relay.const(in_scale),

From fb22ee30c9ad3df384b36f160199be540aec0806 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:46:13 -0800
Subject: [PATCH 45/72] fix some bugs with floating point funcs not working

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 63dabf98088e..2e293d44fc7d 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -17,6 +17,7 @@
 """Relay functions for rewriting fake quantized ops."""
 import numpy as np
 import tvm
+from scipy import special
 from tvm import relay
 from tvm.ir import TensorAffineType, TupleAffineType
 from tvm.tir import bijective_layout
@@ -167,7 +168,6 @@ def create_integer_lookup_op(
     """
     TODO
     """
-
     # TODO: handle multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
@@ -236,8 +236,8 @@ def func(expr, type_map):
 
 
 register_unary_elementwise_table_lookup_op("tanh", np.tanh)
-register_unary_elementwise_table_lookup_op("erf", np.math.erf)
-register_unary_elementwise_table_lookup_op("exp", np.math.exp)
+register_unary_elementwise_table_lookup_op("erf", special.erf)
+register_unary_elementwise_table_lookup_op("exp", np.exp)
 register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
 
 register_unary_identity("reshape")

From 0a03d46b9fe2ae77ebcdc1e1b18fa6bfa639f07f Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 19 Jan 2022 12:48:56 -0800
Subject: [PATCH 46/72] add TODO

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 2e293d44fc7d..d6a9d534767a 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -235,6 +235,7 @@ def func(expr, type_map):
     return register_fake_quantization_to_integer(op_name, func)
 
 
+# TODO: better error messages if reference functions fail in FQ2I pass
 register_unary_elementwise_table_lookup_op("tanh", np.tanh)
 register_unary_elementwise_table_lookup_op("erf", special.erf)
 register_unary_elementwise_table_lookup_op("exp", np.exp)

From f8a5114042d3c239fd1903c511d10991932b060d Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Fri, 21 Jan 2022 16:20:38 -0800
Subject: [PATCH 47/72] add tood

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index d6a9d534767a..432870dc98dc 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -111,6 +111,9 @@ def create_integer_lookup_table(
     in_dtype="uint8",
     out_dtype="uint8",
 ):
+    """
+    TODO
+    """
     if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
         np.dtype(out_dtype), np.integer
     ):

From cc2f5a9f845e6377cb0b9aa8114eb904c389c392 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 12:50:19 -0800
Subject: [PATCH 48/72] canonicalizations

---
 python/tvm/relay/qnn/op/__init__.py          |  4 ++--
 python/tvm/relay/qnn/op/canonicalizations.py |  0
 python/tvm/relay/qnn/op/op.py                | 25 +++++++++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)
 create mode 100644 python/tvm/relay/qnn/op/canonicalizations.py

diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
index 848409360a9d..745050e286e8 100644
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -18,5 +18,5 @@
 """QNN dialect related operators."""
 from __future__ import absolute_import as _abs
 from .qnn import *
-from .op import register_qnn_legalize
-from . import _qnn, legalizations, layout_conversions
+from .op import register_qnn_legalize, register_qnn_canonicalize
+from . import _qnn, legalizations, layout_conversions, canonicalizations
diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py
index 32a61229951c..c83a32e2ce6a 100644
--- a/python/tvm/relay/qnn/op/op.py
+++ b/python/tvm/relay/qnn/op/op.py
@@ -20,7 +20,10 @@
 
 
 def register_qnn_legalize(op_name, legal_op=None, level=10):
-    """Register legal transformation function for a QNN op
+    """Register legal transformation function for a QNN op.
+
+    This helps QNN match hardware intrinsics better and is run before
+    canonicalization.
 
     Parameters
     ----------
@@ -34,3 +37,23 @@ def register_qnn_legalize(op_name, legal_op=None, level=10):
         The priority level
     """
     return tvm.ir.register_op_attr(op_name, "FTVMQnnLegalize", legal_op, level)
+
+
+def register_qnn_canonicalize(op_name, legal_op=None, level=10):
+    """Register canonicalization function for a QNN op.
+
+    This transforms QNN ops to mainline Relay components.
+
+    Parameters
+    ----------
+    op_name : str
+        The name of the operator
+
+    legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr
+        The function for transforming an expr to another expr.
+
+    level : int
+        The priority level
+    """
+
+    return tvm.ir.register_op_attr(op_name, "FTVMQnnCanonicalize", legal_op, level)

From 16aad845e0f08d78cce75eed4f82e3c8e7059371 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:02:57 -0800
Subject: [PATCH 49/72] refactor integer lookup ops into own folder

---
 python/tvm/relay/qnn/op/canonicalizations.py  | 121 ++++++++++++++
 .../transform/fake_quantization_to_integer.py | 151 ------------------
 .../relay/qnn/test_canonicalizations.py       | 146 +++++++++++++++++
 .../test_pass_fake_quantization_to_integer.py | 140 ----------------
 4 files changed, 267 insertions(+), 291 deletions(-)
 create mode 100644 tests/python/relay/qnn/test_canonicalizations.py

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index e69de29bb2d1..d419e3eb7a8e 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -0,0 +1,121 @@
+from typing import Callable
+
+import numpy as np
+import tvm
+from tvm import relay
+
+
+# TODO: replace with constant folding
+def run_const_expr(expr: "relay.Expr") -> np.ndarray:
+    mod = tvm.IRModule.from_expr(expr)
+    vm_exe = relay.create_executor("vm", mod=mod)
+    return vm_exe.evaluate()().asnumpy()
+
+
+def create_integer_lookup_table(
+    floating_point_func: Callable[[np.ndarray], np.ndarray],
+    input_scale: "relay.Expr",
+    input_zero_point: "relay.Expr",
+    output_scale: "relay.Expr",
+    output_zero_point: "relay.Expr",
+    in_axis: int = -1,
+    out_axis: int = -1,
+    in_dtype: str = "uint8",
+    out_dtype: str = "uint8",
+) -> np.ndarray:
+    """
+    TODO
+    """
+    if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
+        np.dtype(out_dtype), np.integer
+    ):
+        raise ValueError(
+            f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes."
+        )
+
+    dtype_info = np.iinfo(in_dtype)
+
+    num_bits = dtype_info.bits
+
+    # Use TVMs quantization methods via relay to be consistent
+    # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
+
+    # First generate a list of all num_bit integer patterns
+    inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}")
+
+    # Reinterpret bits as the real datatype
+    # Note what we are doing here is a bit tricky, the canonical view of our lookup table
+    # is using the uintX version. When we run the lookup in the relay graph, we cast the
+    # bit pattern back into this form.
+    inputs_quantized = inputs_quantized.view(in_dtype)
+    inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
+    inputs_dequantized = run_const_expr(
+        relay.qnn.op.dequantize(
+            inputs_quantized,
+            input_scale=input_scale,
+            input_zero_point=input_zero_point,
+            axis=in_axis,
+        )
+    )
+
+    output_dequantized = relay.const(floating_point_func(inputs_dequantized))
+    output_quantized = run_const_expr(
+        relay.qnn.op.quantize(
+            output_dequantized, output_scale, output_zero_point, out_axis, out_dtype
+        )
+    )
+
+    return output_quantized
+
+
+def create_integer_lookup_op(
+    input_arg: "relay.Expr",
+    floating_point_func: Callable[[np.array], np.array],
+    in_scale: "relay.Expr",
+    in_zero_point: "relay.Expr",
+    out_scale: "relay.Expr",
+    out_zero_point: "relay.Expr",
+    in_axis: int = -1,
+    out_axis: int = -1,
+    in_dtype: str = "uint8",
+    out_dtype: str = "uint8",
+) -> "relay.Expr":
+    """
+    TODO
+    """
+    # TODO: handle multi-channel q
+    in_scale = in_scale.data.numpy().item()
+    in_zero_point = in_zero_point.data.numpy().item()
+    out_scale = out_scale.data.numpy().item()
+    out_zero_point = out_zero_point.data.numpy().item()
+
+    lookup_table = create_integer_lookup_table(
+        floating_point_func,
+        relay.const(in_scale),
+        relay.const(in_zero_point, dtype="int32"),
+        relay.const(out_scale),
+        relay.const(out_zero_point, dtype="int32"),
+        in_axis=in_axis,
+        in_dtype=in_dtype,
+        out_axis=out_axis,
+        out_dtype=out_dtype,
+    )
+
+    in_dtype_info = np.iinfo(in_dtype)
+    in_dtype_num_bits = in_dtype_info.bits
+
+    lookup_table = relay.const(lookup_table)
+    index_tensor = relay.reshape(input_arg, [-1])
+    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
+    result = relay.gather(lookup_table, -1, index_tensor)
+    result = relay.reshape_like(result, input_arg)
+    return result
+
+
+"""
+# TODO: better error messages if reference functions fail in FQ2I pass
+register_unary_elementwise_table_lookup_op("tanh", np.tanh)
+register_unary_elementwise_table_lookup_op("erf", special.erf)
+register_unary_elementwise_table_lookup_op("exp", np.exp)
+register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
+"""
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 432870dc98dc..9fdc2186a397 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -93,157 +93,6 @@ def identity(expr, type_map):
     return register_fake_quantization_to_integer(op_name, identity)
 
 
-# TODO: replace with constant folding
-def run_const_expr(expr):
-    mod = tvm.IRModule.from_expr(expr)
-    vm_exe = relay.create_executor("vm", mod=mod)
-    return vm_exe.evaluate()().asnumpy()
-
-
-def create_integer_lookup_table(
-    floating_point_func,
-    input_scale,
-    input_zero_point,
-    output_scale,
-    output_zero_point,
-    in_axis=-1,
-    out_axis=-1,
-    in_dtype="uint8",
-    out_dtype="uint8",
-):
-    """
-    TODO
-    """
-    if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
-        np.dtype(out_dtype), np.integer
-    ):
-        raise ValueError(
-            f"Only integer dtypes allowed got {in_dtype} and {out_dtype} for in and out dtypes."
-        )
-
-    dtype_info = np.iinfo(in_dtype)
-
-    num_bits = dtype_info.bits
-
-    # Use TVMs quantization methods via relay to be consistent
-    # inputs_quantized = np.array(range(dtype_info.min, dtype_info.max + 1)).astype(in_dtype)
-
-    # First generate a list of all num_bit integer patterns
-    inputs_quantized = np.array(range(0, 2 ** num_bits), dtype=f"uint{num_bits}")
-
-    # Reinterpret bits as the real datatype
-    # Note what we are doing here is a bit tricky, the canonical view of our lookup table
-    # is using the uintX version. When we run the lookup in the relay graph, we cast the
-    # bit pattern back into this form.
-    inputs_quantized = inputs_quantized.view(in_dtype)
-    inputs_quantized = relay.const(inputs_quantized, dtype=in_dtype)
-    inputs_dequantized = run_const_expr(
-        relay.qnn.op.dequantize(
-            inputs_quantized,
-            input_scale=input_scale,
-            input_zero_point=input_zero_point,
-            axis=in_axis,
-        )
-    )
-
-    output_dequantized = relay.const(floating_point_func(inputs_dequantized))
-    output_quantized = run_const_expr(
-        relay.qnn.op.quantize(
-            output_dequantized, output_scale, output_zero_point, out_axis, out_dtype
-        )
-    )
-
-    return output_quantized
-
-
-def create_integer_lookup_op(
-    input_arg,
-    floating_point_func,
-    in_scale,
-    in_zero_point,
-    out_scale,
-    out_zero_point,
-    in_axis=-1,
-    out_axis=-1,
-    in_dtype="uint8",
-    out_dtype="uint8",
-):
-    """
-    TODO
-    """
-    # TODO: handle multi-channel q
-    in_scale = in_scale.data.numpy().item()
-    in_zero_point = in_zero_point.data.numpy().item()
-    out_scale = out_scale.data.numpy().item()
-    out_zero_point = out_zero_point.data.numpy().item()
-
-    lookup_table = create_integer_lookup_table(
-        floating_point_func,
-        relay.const(in_scale),
-        relay.const(in_zero_point, dtype="int32"),
-        relay.const(out_scale),
-        relay.const(out_zero_point, dtype="int32"),
-        in_axis=in_axis,
-        in_dtype=in_dtype,
-        out_axis=out_axis,
-        out_dtype=out_dtype,
-    )
-
-    in_dtype_info = np.iinfo(in_dtype)
-    in_dtype_num_bits = in_dtype_info.bits
-
-    lookup_table = relay.const(lookup_table)
-    index_tensor = relay.reshape(input_arg, [-1])
-    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
-    result = relay.gather(lookup_table, -1, index_tensor)
-    result = relay.reshape_like(result, input_arg)
-    return result
-
-
-def register_unary_elementwise_table_lookup_op(op_name, floating_point_func):
-    """Implement an operator in quantized space via table lookup operations (e.g. via gather).
-
-    op_name: str
-        The name of the operator to register for FQ2I.
-
-    example_func: Callable[[np.ndarray], np.ndarray]
-        The FP32 version of the function to quantize operating on numpy arrays.
-    """
-
-    def func(expr, type_map):
-        assert len(expr.args) == 1, "only support elemwise ops for now!"
-        arg = expr.args[0]
-        in_scale = fold_constant(type_map[arg].scale)
-        in_zero_point = fold_constant(type_map[arg].zero_point)
-        out_scale = fold_constant(type_map[expr].scale)
-        out_zero_point = fold_constant(type_map[expr].zero_point)
-        in_axis = type_map[arg].axis
-        in_dtype = type_map[arg].dtype
-        out_axis = type_map[expr].axis
-        out_dtype = type_map[expr].dtype
-        result = create_integer_lookup_op(
-            input_arg=arg,
-            floating_point_func=floating_point_func,
-            in_scale=in_scale,
-            in_zero_point=in_zero_point,
-            out_scale=out_scale,
-            out_zero_point=out_zero_point,
-            in_axis=in_axis,
-            in_dtype=in_dtype,
-            out_axis=out_axis,
-            out_dtype=out_dtype,
-        )
-        return [result, type_map[expr]]
-
-    return register_fake_quantization_to_integer(op_name, func)
-
-
-# TODO: better error messages if reference functions fail in FQ2I pass
-register_unary_elementwise_table_lookup_op("tanh", np.tanh)
-register_unary_elementwise_table_lookup_op("erf", special.erf)
-register_unary_elementwise_table_lookup_op("exp", np.exp)
-register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
-
 register_unary_identity("reshape")
 register_unary_identity("squeeze")
 register_unary_identity("strided_slice")
diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
new file mode 100644
index 000000000000..aad6bb3ede19
--- /dev/null
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -0,0 +1,146 @@
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay.qnn.op import canonicalizations
+
+
+class TestIntegerTableLookupTable:
+    """Consists of tests testing functionality of creating lookup tables for integer operations."""
+
+    # def __init__(self) -> None:
+    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
+
+    def fake_identity_func_numpy(self, arr: np.ndarray):
+        return arr.astype("float32")
+
+    def fake_identity_func_relay(
+        self,
+        input_arg=None,
+        in_scale=relay.const(1.0, dtype="float32"),
+        in_zero_point=relay.const(0, dtype="int32"),
+        out_scale=relay.const(1.0, dtype="float32"),
+        out_zero_point=relay.const(0, dtype="int32"),
+        in_axis=-1,
+        out_axis=-1,
+        in_dtype="uint8",
+        out_dtype="uint8",
+    ):
+        if input_arg is None:
+            input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype))
+
+        return (
+            canonicalizations.create_integer_lookup_op(
+                input_arg=input_arg,
+                floating_point_func=self.fake_identity_func_numpy,
+                in_scale=in_scale,
+                in_zero_point=in_zero_point,
+                out_scale=out_scale,
+                out_zero_point=out_zero_point,
+                in_axis=in_axis,
+                out_axis=out_axis,
+                in_dtype=in_dtype,
+                out_dtype=out_dtype,
+            ),
+            input_arg.data.numpy(),
+        )
+
+    def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
+        return (np_arr.astype("int32") - np_zero_point) * np_scale
+
+    def test_int8_to_int8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_uint8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+
+    def test_int8_to_uint8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            out_scale=relay.const(1.0, dtype="float32"),
+            out_zero_point=relay.const(128, dtype="int32"),
+            in_dtype="int8",
+            out_dtype="uint8",
+        )
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg),
+            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
+        )
+
+    def test_uint8_to_int8(self):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            in_scale=relay.const(1.0, dtype="float32"),
+            in_zero_point=relay.const(128, dtype="int32"),
+            in_dtype="uint8",
+            out_dtype="int8",
+        )
+        result = canonicalizations.run_const_expr(relay_lookup)
+        assert np.allclose(
+            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
+            self.dequantize_numpy(result),
+        )
+
+
+"""
+def test_fake_quantize_tanh():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.tanh(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_erf():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.erf(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_exp():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.exp(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+
+
+def test_fake_quantize_sigmoid():
+    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
+
+    zero = relay.const(0)
+    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
+    op = relay.op.sigmoid(x)
+
+    # Have difference scales for input/output to test if can handle
+    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
+
+    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+
+    compare_fq_to_int(op, [x_np])
+"""
diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index a7ab19bd059f..28166bb8be72 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -587,146 +587,6 @@ def run_test_case(partial_func):
     run_test_case(lambda x: relay.op.min(x, axis=1))
 
 
-class TestIntegerTableLookupTable:
-    """Consists of tests testing functionality of creating lookup tables for integer operations."""
-
-    # def __init__(self) -> None:
-    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
-
-    def fake_identity_func_numpy(self, arr: np.ndarray):
-        return arr.astype("float32")
-
-    def fake_identity_func_relay(
-        self,
-        input_arg=None,
-        in_scale=relay.const(1.0, dtype="float32"),
-        in_zero_point=relay.const(0, dtype="int32"),
-        out_scale=relay.const(1.0, dtype="float32"),
-        out_zero_point=relay.const(0, dtype="int32"),
-        in_axis=-1,
-        out_axis=-1,
-        in_dtype="uint8",
-        out_dtype="uint8",
-    ):
-        if input_arg is None:
-            input_arg = relay.const(np.arange(0, 256, dtype="uint8").view(in_dtype))
-
-        return (
-            fake_quantization_to_integer.create_integer_lookup_op(
-                input_arg=input_arg,
-                floating_point_func=self.fake_identity_func_numpy,
-                in_scale=in_scale,
-                in_zero_point=in_zero_point,
-                out_scale=out_scale,
-                out_zero_point=out_zero_point,
-                in_axis=in_axis,
-                out_axis=out_axis,
-                in_dtype=in_dtype,
-                out_dtype=out_dtype,
-            ),
-            input_arg.data.numpy(),
-        )
-
-    def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
-        return (np_arr.astype("int32") - np_zero_point) * np_scale
-
-    def test_int8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
-
-    def test_uint8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
-
-    def test_int8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            out_scale=relay.const(1.0, dtype="float32"),
-            out_zero_point=relay.const(128, dtype="int32"),
-            in_dtype="int8",
-            out_dtype="uint8",
-        )
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg),
-            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
-        )
-
-    def test_uint8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            in_scale=relay.const(1.0, dtype="float32"),
-            in_zero_point=relay.const(128, dtype="int32"),
-            in_dtype="uint8",
-            out_dtype="int8",
-        )
-        result = fake_quantization_to_integer.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
-            self.dequantize_numpy(result),
-        )
-
-
-def test_fake_quantize_tanh():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.tanh(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_erf():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.erf(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_exp():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.exp(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_sigmoid():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.sigmoid(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
 def test_fq_hard_fail():
     @tvm.ir.register_op_attr("nn.conv2d", "FTVMFakeQuantizationToInteger", level=11)
     def conv2d(expr, type_map):  # pylint: disable=unused-variable

From eacf38368fb0975b0d4e3fee91673cae2f421011 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:09:33 -0800
Subject: [PATCH 50/72] fq2i stuff

---
 python/tvm/relay/transform/fake_quantization_to_integer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 9fdc2186a397..3337c7cfb894 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -17,9 +17,11 @@
 """Relay functions for rewriting fake quantized ops."""
 import numpy as np
 import tvm
-from scipy import special
 from tvm import relay
 from tvm.ir import TensorAffineType, TupleAffineType
+
+# import to register canonicalization funcs for fq2i
+from tvm.relay.qnn.op import canonicalizations
 from tvm.tir import bijective_layout
 
 from ..op import register_fake_quantization_to_integer

From f1753c96fc4844bd9ebed743c6a1a3d7a7261161 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:33:35 -0800
Subject: [PATCH 51/72] clean up existing tests

---
 python/tvm/relay/qnn/op/canonicalizations.py  |  2 +-
 .../relay/qnn/test_canonicalizations.py       | 90 ++++++++++++++-----
 2 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index d419e3eb7a8e..334b824f8b50 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -5,8 +5,8 @@
 from tvm import relay
 
 
-# TODO: replace with constant folding
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
+    """Run a const expression, receiving result as np array."""
     mod = tvm.IRModule.from_expr(expr)
     vm_exe = relay.create_executor("vm", mod=mod)
     return vm_exe.evaluate()().asnumpy()
diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index aad6bb3ede19..0f9939f1cbd8 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -7,9 +7,6 @@
 class TestIntegerTableLookupTable:
     """Consists of tests testing functionality of creating lookup tables for integer operations."""
 
-    # def __init__(self) -> None:
-    #     self.input = np.arange(start=0, stop=256, dtype="uint8")
-
     def fake_identity_func_numpy(self, arr: np.ndarray):
         return arr.astype("float32")
 
@@ -47,40 +44,85 @@ def fake_identity_func_relay(
     def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
         return (np_arr.astype("int32") - np_zero_point) * np_scale
 
-    def test_int8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="int8", out_dtype="int8")
+    def run_identity_function_test(
+        self,
+        in_scale: float,
+        in_zero_point: int,
+        out_scale: float,
+        out_zero_point: int,
+        in_dtype: str,
+        out_dtype: str,
+        rtol=1e-7,
+        atol=0,
+    ):
+        relay_lookup, input_arg = self.fake_identity_func_relay(
+            in_scale=relay.const(in_scale, "float32"),
+            in_zero_point=relay.const(in_zero_point, "int32"),
+            out_scale=relay.const(out_scale, "float32"),
+            out_zero_point=relay.const(out_zero_point, "int32"),
+            in_dtype=in_dtype,
+            out_dtype=out_dtype,
+        )
         result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+        np.testing.assert_allclose(
+            self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point),
+            self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point),
+            atol=atol,
+            rtol=rtol,
+        )
+
+    def test_int8_to_int8(self):
+        """Test int8 input to int8 output mapping workings"""
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=0,
+            out_scale=1.0,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+        )
 
     def test_uint8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(in_dtype="uint8", out_dtype="uint8")
-        result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(self.dequantize_numpy(input_arg), self.dequantize_numpy(result))
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=128,
+            out_scale=1.0,
+            out_zero_point=128,
+            in_dtype="uint8",
+            out_dtype="uint8",
+        )
 
     def test_int8_to_uint8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            out_scale=relay.const(1.0, dtype="float32"),
-            out_zero_point=relay.const(128, dtype="int32"),
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=0,
+            out_scale=1.0,
+            out_zero_point=128,
             in_dtype="int8",
             out_dtype="uint8",
         )
-        result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg),
-            self.dequantize_numpy(result, np_scale=1.0, np_zero_point=128),
-        )
 
     def test_uint8_to_int8(self):
-        relay_lookup, input_arg = self.fake_identity_func_relay(
-            in_scale=relay.const(1.0, dtype="float32"),
-            in_zero_point=relay.const(128, dtype="int32"),
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=128,
+            out_scale=1.0,
+            out_zero_point=0,
             in_dtype="uint8",
             out_dtype="int8",
         )
-        result = canonicalizations.run_const_expr(relay_lookup)
-        assert np.allclose(
-            self.dequantize_numpy(input_arg, np_scale=1.0, np_zero_point=128),
-            self.dequantize_numpy(result),
+
+    def test_different_in_out_qparams(self):
+        """Test mapping with different in/out qparams works."""
+        self.run_identity_function_test(
+            in_scale=1.0,
+            in_zero_point=128,
+            out_scale=1.0,
+            out_zero_point=128,
+            in_dtype="uint8",
+            out_dtype="uint8",
+            atol=1,  # numbers range from -128 -> 128 so not that big error
+            rtol=0,
         )
 
 
From 76cef1bcc450a4d3229676b64e009a66544c5d98 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 13:34:08 -0800
Subject: [PATCH 52/72] flesh out todo

---
 python/tvm/relay/qnn/op/canonicalizations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 334b824f8b50..19ee1b131908 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -83,7 +83,7 @@ def create_integer_lookup_op(
     """
     TODO
     """
-    # TODO: handle multi-channel q
+    # TODO: handle multi-channel q, if below fails it's probably that
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
     out_scale = out_scale.data.numpy().item()

From e996279eada5d2292db3d9772c16366eb9a03947 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 14:14:13 -0800
Subject: [PATCH 53/72] more tests

---
 .../relay/qnn/test_canonicalizations.py       | 121 ++++++++----------
 1 file changed, 52 insertions(+), 69 deletions(-)

diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index 0f9939f1cbd8..e13d96885051 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -1,6 +1,9 @@
+from typing import Callable
+
 import numpy as np
 import tvm
 from tvm import relay
+from tvm.relay.op.transform import arange
 from tvm.relay.qnn.op import canonicalizations
 
 
@@ -13,6 +16,7 @@ def fake_identity_func_numpy(self, arr: np.ndarray):
     def fake_identity_func_relay(
         self,
         input_arg=None,
+        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
         in_scale=relay.const(1.0, dtype="float32"),
         in_zero_point=relay.const(0, dtype="int32"),
         out_scale=relay.const(1.0, dtype="float32"),
@@ -28,7 +32,7 @@ def fake_identity_func_relay(
         return (
             canonicalizations.create_integer_lookup_op(
                 input_arg=input_arg,
-                floating_point_func=self.fake_identity_func_numpy,
+                floating_point_func=floating_point_func,
                 in_scale=in_scale,
                 in_zero_point=in_zero_point,
                 out_scale=out_scale,
@@ -44,7 +48,7 @@ def fake_identity_func_relay(
     def dequantize_numpy(self, np_arr, np_scale=1.0, np_zero_point=0):
         return (np_arr.astype("int32") - np_zero_point) * np_scale
 
-    def run_identity_function_test(
+    def run_function_test(
         self,
         in_scale: float,
         in_zero_point: int,
@@ -52,10 +56,14 @@ def run_identity_function_test(
         out_zero_point: int,
         in_dtype: str,
         out_dtype: str,
+        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
+        input_arg: relay.Expr = None,
         rtol=1e-7,
         atol=0,
     ):
         relay_lookup, input_arg = self.fake_identity_func_relay(
+            input_arg=input_arg,
+            floating_point_func=floating_point_func,
             in_scale=relay.const(in_scale, "float32"),
             in_zero_point=relay.const(in_zero_point, "int32"),
             out_scale=relay.const(out_scale, "float32"),
@@ -65,15 +73,18 @@ def run_identity_function_test(
         )
         result = canonicalizations.run_const_expr(relay_lookup)
         np.testing.assert_allclose(
-            self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point),
+            floating_point_func(
+                self.dequantize_numpy(input_arg, np_scale=in_scale, np_zero_point=in_zero_point)
+            ),
             self.dequantize_numpy(result, np_scale=out_scale, np_zero_point=out_zero_point),
             atol=atol,
             rtol=rtol,
         )
 
+    """Test mapping between different input/output dtypes"""
+
     def test_int8_to_int8(self):
-        """Test int8 input to int8 output mapping workings"""
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=0,
             out_scale=1.0,
@@ -83,7 +94,7 @@ def test_int8_to_int8(self):
         )
 
     def test_uint8_to_uint8(self):
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=128,
             out_scale=1.0,
@@ -93,7 +104,7 @@ def test_uint8_to_uint8(self):
         )
 
     def test_int8_to_uint8(self):
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=0,
             out_scale=1.0,
@@ -103,7 +114,7 @@ def test_int8_to_uint8(self):
         )
 
     def test_uint8_to_int8(self):
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=128,
             out_scale=1.0,
@@ -112,9 +123,10 @@ def test_uint8_to_int8(self):
             out_dtype="int8",
         )
 
+    """Test mapping with different in/out qparams works."""
+
     def test_different_in_out_qparams(self):
-        """Test mapping with different in/out qparams works."""
-        self.run_identity_function_test(
+        self.run_function_test(
             in_scale=1.0,
             in_zero_point=128,
             out_scale=1.0,
@@ -125,64 +137,35 @@ def test_different_in_out_qparams(self):
             rtol=0,
         )
 
+    """Test some simple functions"""
 
-"""
-def test_fake_quantize_tanh():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.tanh(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_erf():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.erf(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_exp():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.exp(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
-
-    compare_fq_to_int(op, [x_np])
-
-
-def test_fake_quantize_sigmoid():
-    x = relay.var("x", shape=[3, 3, 3, 3], dtype="int8")
-
-    zero = relay.const(0)
-    x = relay.qnn.op.dequantize(x, relay.const(0.03), zero)
-    op = relay.op.sigmoid(x)
-
-    # Have difference scales for input/output to test if can handle
-    op = relay.qnn.op.quantize(op, relay.const(0.01), zero)
-
-    x_np = np.random.randint(-128, 127, size=[3, 3, 3, 3], dtype="int8")
+    def test_tanh(self):
+        # 1 / 64 in scale -- input range is ~ (-2, 2), tanh(+-2) ~= +-1
+        # 1 / 128 out_scale -- output range is ~(-1, 1)
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8")),
+            in_scale=1 / 64,
+            in_zero_point=0,
+            out_scale=1 / 128,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=np.tanh,
+            atol=0.01,
+            rtol=0.01,
+        )
 
-    compare_fq_to_int(op, [x_np])
-"""
+    def test_exp(self):
+        # input in floating point ~[-2, 2], final output ~[0, 8]
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8")),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=np.exp,
+            atol=0.03,
+            rtol=0.01,
+        )

From 1ff3adcbc42c736d9627c211a071381550fbbe4a Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 14:41:35 -0800
Subject: [PATCH 54/72] test on keeping shape good

---
 .../relay/qnn/test_canonicalizations.py       | 50 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index e13d96885051..334c628e5e54 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -15,8 +15,8 @@ def fake_identity_func_numpy(self, arr: np.ndarray):
 
     def fake_identity_func_relay(
         self,
+        floating_point_func: Callable[[np.ndarray], np.ndarray],
         input_arg=None,
-        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
         in_scale=relay.const(1.0, dtype="float32"),
         in_zero_point=relay.const(0, dtype="int32"),
         out_scale=relay.const(1.0, dtype="float32"),
@@ -56,7 +56,7 @@ def run_function_test(
         out_zero_point: int,
         in_dtype: str,
         out_dtype: str,
-        floating_point_func: Callable[[np.ndarray], np.ndarray] = fake_identity_func_numpy,
+        floating_point_func: Callable[[np.ndarray], np.ndarray],
         input_arg: relay.Expr = None,
         rtol=1e-7,
         atol=0,
@@ -91,6 +91,7 @@ def test_int8_to_int8(self):
             out_zero_point=0,
             in_dtype="int8",
             out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
         )
 
     def test_uint8_to_uint8(self):
@@ -101,6 +102,7 @@ def test_uint8_to_uint8(self):
             out_zero_point=128,
             in_dtype="uint8",
             out_dtype="uint8",
+            floating_point_func=self.fake_identity_func_numpy,
         )
 
     def test_int8_to_uint8(self):
@@ -111,6 +113,7 @@ def test_int8_to_uint8(self):
             out_zero_point=128,
             in_dtype="int8",
             out_dtype="uint8",
+            floating_point_func=self.fake_identity_func_numpy,
         )
 
     def test_uint8_to_int8(self):
@@ -121,6 +124,48 @@ def test_uint8_to_int8(self):
             out_zero_point=0,
             in_dtype="uint8",
             out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+        )
+
+    """Test different input shapes"""
+
+    def test_keep_input_shapes(self):
+        # input in floating point ~[-2, 2], final output ~[0, 8]
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 8, 8])),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+            atol=0.03,
+            rtol=0.01,
+        )
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 2, 64])),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+            atol=0.03,
+            rtol=0.01,
+        )
+        self.run_function_test(
+            input_arg=relay.const(np.arange(-128, 128).astype("int8").reshape([2, 128])),
+            in_scale=0.015,
+            in_zero_point=0,
+            out_scale=16 / 256,
+            out_zero_point=0,
+            in_dtype="int8",
+            out_dtype="int8",
+            floating_point_func=self.fake_identity_func_numpy,
+            atol=0.03,
+            rtol=0.01,
         )
 
     """Test mapping with different in/out qparams works."""
@@ -133,6 +178,7 @@ def test_different_in_out_qparams(self):
             out_zero_point=128,
             in_dtype="uint8",
             out_dtype="uint8",
+            floating_point_func=self.fake_identity_func_numpy,
             atol=1,  # numbers range from -128 -> 128 so not that big error
             rtol=0,
         )

From eabd40a55b8a879775aa048a422a7d24a3788ef9 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 14:42:00 -0800
Subject: [PATCH 55/72] lookup table fix

---
 tests/python/relay/qnn/test_canonicalizations.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index 334c628e5e54..84e01976f4f4 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -1,9 +1,7 @@
 from typing import Callable
 
 import numpy as np
-import tvm
 from tvm import relay
-from tvm.relay.op.transform import arange
 from tvm.relay.qnn.op import canonicalizations
 
 
From efe7b1aa6a25f0117162466f3ba08109e4675d90 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 15:04:48 -0800
Subject: [PATCH 56/72] replace canonicalization for rsqrt

---
 python/tvm/relay/qnn/op/canonicalizations.py | 23 +++++++++++++-------
 src/relay/qnn/op/rsqrt.cc                    |  4 ++--
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 19ee1b131908..b0ac0b1c15c1 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tvm
 from tvm import relay
+from tvm.relay.qnn.op.op import register_qnn_canonicalize
 
 
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
@@ -83,7 +84,7 @@ def create_integer_lookup_op(
     """
     TODO
     """
-    # TODO: handle multi-channel q, if below fails it's probably that
+    # TODO: handle multi-channel q, below will fail with multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
     out_scale = out_scale.data.numpy().item()
@@ -112,10 +113,16 @@ def create_integer_lookup_op(
     return result
 
 
-"""
-# TODO: better error messages if reference functions fail in FQ2I pass
-register_unary_elementwise_table_lookup_op("tanh", np.tanh)
-register_unary_elementwise_table_lookup_op("erf", special.erf)
-register_unary_elementwise_table_lookup_op("exp", np.exp)
-register_unary_elementwise_table_lookup_op("sigmoid", lambda x: 1 / (1 + np.exp(-x)))
-"""
+@register_qnn_canonicalize("qnn.rsqrt")
+def canonicalize_rsqrt(attrs, args, arg_types):
+    """Canonicalization for rsqrt"""
+    return create_integer_lookup_op(
+        args[0],
+        lambda arr: 1 / np.sqrt(arr),
+        args[1],
+        args[2],
+        args[3],
+        args[4],
+        in_dtype=arg_types[0].dtype,
+        out_dtype=arg_types[0].dtype,
+    )
diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc
index 55814dff422b..2bd73d758144 100644
--- a/src/relay/qnn/op/rsqrt.cc
+++ b/src/relay/qnn/op/rsqrt.cc
@@ -105,6 +105,7 @@ Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1);
 }
 
+// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py
 RELAY_REGISTER_OP("qnn.rsqrt")
     .describe("Elementwise rsqrt for quantized tensors.")
     .set_num_inputs(5)
@@ -116,8 +117,7 @@ RELAY_REGISTER_OP("qnn.rsqrt")
                   "The quantization zero_point of the output tensor.")
     .set_support_level(11)
     .add_type_rel("QRsqrt", QnnRsqrtRel)
-    .set_attr<TNonComputational>("TNonComputational", true)
-    .set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnRsqrtCanonicalize);
+    .set_attr<TNonComputational>("TNonComputational", true);
 
 TVM_REGISTER_GLOBAL("relay.qnn.op._make.rsqrt").set_body_typed(MakeQuantizedRsqrt);
 

From 3b000801acc01f40d6ca53f06964a18f03a21262 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 15:05:47 -0800
Subject: [PATCH 57/72] remove canonicalization of rsqrt

---
 src/relay/qnn/op/rsqrt.cc | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc
index 2bd73d758144..6d37b1aa8d5d 100644
--- a/src/relay/qnn/op/rsqrt.cc
+++ b/src/relay/qnn/op/rsqrt.cc
@@ -69,42 +69,6 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale,
   return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {});
 }
 
-/*
- * \brief Canonicalizes the QNN rsqrt op.
- * \param attrs The empty attribute.
- * \param new_args The new mutated args to the call node.
- * \param arg_types The types of input and output.
- * \return The sequence of Relay ops for add op.
- */
-Expr QnnRsqrtCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
-                          const Array<tvm::relay::Type>& arg_types) {
-  // At this time, due to the complexity of implementing this op in int8 or uint8,
-  // we dequantize the input, run the op in float, and then quantize the output (as below).
-  // This acts as a placeholder for future hardware enablement, where more hardware specific
-  // canonicalization can be provided.
-
-  // Get the args.
-  QnnUnaryOpArguments args(new_args);
-
-  // Get the input dtype and shape.
-  QnnUnaryOpTensorType input_type(arg_types, 0);
-
-  // Get the types for dequantize/quantize.
-  Array<tvm::relay::Type> types;
-  for (size_t i = 1; i < 5; ++i) {
-    types.push_back(arg_types[i]);
-  }
-
-  // Dequantize input.
-  auto dequantized_arg = Dequantize(args.x, args.scale, args.zero_point, types, -1);
-
-  // Compute Rsqrt(Q_x')
-  auto output = Rsqrt(dequantized_arg);
-
-  // Quantize output.
-  return Quantize(output, args.output_scale, args.output_zero_point, input_type.dtype, types, -1);
-}
-
 // Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py
 RELAY_REGISTER_OP("qnn.rsqrt")
     .describe("Elementwise rsqrt for quantized tensors.")

From 3adcb9e9292492dcd2cce63eea7d7a11b6d6fcac Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 24 Jan 2022 20:55:19 -0800
Subject: [PATCH 58/72] add asf headers

---
 python/tvm/relay/qnn/op/canonicalizations.py     | 16 ++++++++++++++++
 tests/python/relay/qnn/test_canonicalizations.py | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index b0ac0b1c15c1..23c5e37f7ebe 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -1,3 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 from typing import Callable
 
 import numpy as np
diff --git a/tests/python/relay/qnn/test_canonicalizations.py b/tests/python/relay/qnn/test_canonicalizations.py
index 84e01976f4f4..0505a88c07bd 100644
--- a/tests/python/relay/qnn/test_canonicalizations.py
+++ b/tests/python/relay/qnn/test_canonicalizations.py
@@ -1,3 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
 from typing import Callable
 
 import numpy as np

From 79289576be05edcda346fb0d031104037b5da552 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 13:51:14 -0800
Subject: [PATCH 59/72] gather supports unsigned integer tests

---
 tests/python/relay/test_op_level3.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 34f33240f5ac..327575fb8955 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -1296,7 +1296,6 @@ def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"):
     verify_gather(data, axis, indices, ref_res)
     verify_gather(data, axis, indices, ref_res, indices_dtype="uint32")
 
-    verify_gather(data, axis, indices, ref_res)
 
 
 def test_gather_nd(target, dev, executor_kind):

From 3b5759badc4c584470330e7f73a3e1342afc3883 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 14:10:24 -0800
Subject: [PATCH 60/72] fix things

---
 python/tvm/relay/qnn/op/canonicalizations.py | 51 ++++++++++++++++----
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 23c5e37f7ebe..db8b71e68998 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -23,7 +23,7 @@
 
 
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
-    """Run a const expression, receiving result as np array."""
+    """Evaluate a const expression, receiving result as np array."""
     mod = tvm.IRModule.from_expr(expr)
     vm_exe = relay.create_executor("vm", mod=mod)
     return vm_exe.evaluate()().asnumpy()
@@ -41,7 +41,24 @@ def create_integer_lookup_table(
     out_dtype: str = "uint8",
 ) -> np.ndarray:
     """
-    TODO
+    Return a table where each input indexes to the quantized output approximating the given function.
+
+    Note this also supports mapping unsigned and signed integers to each other.
+
+    Args:
+      floating_point_func: The numpy function which this table is to approximate
+      input_scale: The scale of the quantized input tensor.
+      input_zero_point: The zero point of the quantized input tensor.
+      output_scale: The scale of the quantized output tensor.
+      output_zero_point: The zero point of the quantized output tensor.
+      in_axis: The axis for multi-channel quantization of the input if applicable.
+      out_axis: The axis for multi-channel quantization of the output if applicable.
+      in_dtype: The dtype of the input tensor.
+      out_dtype: The wanted dtype of the output tensor.
+
+    Returns:
+      A numpy array where values in quantized space will index to the output in quantized space
+      approximating the given function.
     """
     if not np.issubdtype(np.dtype(in_dtype), np.integer) or not np.issubdtype(
         np.dtype(out_dtype), np.integer
@@ -98,8 +115,24 @@ def create_integer_lookup_op(
     out_dtype: str = "uint8",
 ) -> "relay.Expr":
     """
-    TODO
+    Create a quantized version of the given floating point unary operation using table lookup.
+
+    Args:
+      input_arg: The quantized input to the final function.
+      floating_point_func: The numpy function which this table is to approximate
+      in_scale: The scale of the quantized input tensor.
+      in_zero_point: The zero point of the quantized input tensor.
+      out_scale: The scale of the quantized output tensor.
+      out_zero_point: The zero point of the quantized output tensor.
+      in_axis: The axis for multi-channel quantization of the input if applicable.
+      out_axis: The axis for multi-channel quantization of the output if applicable.
+      in_dtype: The dtype of the input tensor.
+      out_dtype: The wanted dtype of the output tensor.
+
+    Returns:
+      A Relay expression representing a quantized version of the given function.
     """
+
     # TODO: handle multi-channel q, below will fail with multi-channel q
     in_scale = in_scale.data.numpy().item()
     in_zero_point = in_zero_point.data.numpy().item()
@@ -133,12 +166,12 @@ def create_integer_lookup_op(
 def canonicalize_rsqrt(attrs, args, arg_types):
     """Canonicalization for rsqrt"""
     return create_integer_lookup_op(
-        args[0],
-        lambda arr: 1 / np.sqrt(arr),
-        args[1],
-        args[2],
-        args[3],
-        args[4],
+        input_arg=args[0],
+        floating_point_func=lambda arr: 1 / np.sqrt(arr),
+        in_scale=args[1],
+        in_zero_point=args[2],
+        out_scale=args[3],
+        out_zero_point=args[4],
         in_dtype=arg_types[0].dtype,
         out_dtype=arg_types[0].dtype,
     )

From a2f4c5e54119f8d5daff2c8673a0d5fcd1065a22 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Tue, 25 Jan 2022 14:37:40 -0800
Subject: [PATCH 61/72] move to legalization

---
 python/tvm/relay/qnn/op/canonicalizations.py | 15 ---------------
 python/tvm/relay/qnn/op/legalizations.py     | 20 ++++++++++++++++++--
 src/relay/qnn/op/rsqrt.cc                    |  4 +++-
 tests/python/relay/test_op_qnn_rsqrt.py      |  4 +++-
 4 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index db8b71e68998..bd0108d72e64 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -160,18 +160,3 @@ def create_integer_lookup_op(
     result = relay.gather(lookup_table, -1, index_tensor)
     result = relay.reshape_like(result, input_arg)
     return result
-
-
-@register_qnn_canonicalize("qnn.rsqrt")
-def canonicalize_rsqrt(attrs, args, arg_types):
-    """Canonicalization for rsqrt"""
-    return create_integer_lookup_op(
-        input_arg=args[0],
-        floating_point_func=lambda arr: 1 / np.sqrt(arr),
-        in_scale=args[1],
-        in_zero_point=args[2],
-        out_scale=args[3],
-        out_zero_point=args[4],
-        in_dtype=arg_types[0].dtype,
-        out_dtype=arg_types[0].dtype,
-    )
diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 52fe6c8ebe2f..947e9d823134 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -17,12 +17,13 @@
 # pylint: disable=invalid-name, unused-argument
 """Backend QNN related feature registration"""
 import numpy as np
-
 import tvm
 from tvm import relay
 from tvm._ffi.base import TVMError
-from .. import op as reg
+from tvm.relay.qnn.op.canonicalizations import create_integer_lookup_op
+
 from ....topi.x86.utils import target_has_sse42
+from .. import op as reg
 
 #################################################
 # Register the functions for different operators.
@@ -46,6 +47,21 @@ def legalize_qnn_dense(attrs, inputs, types):
     return qnn_dense_legalize(attrs, inputs, types)
 
 
+# Registering QNN dense legalization function.
+@reg.register_qnn_legalize("qnn.rsqrt")
+def legalize_qnn_dense(attrs, inputs, types):
+    return create_integer_lookup_op(
+        input_arg=inputs[0],
+        floating_point_func=lambda arr: 1 / np.sqrt(arr),
+        in_scale=inputs[1],
+        in_zero_point=inputs[2],
+        out_scale=inputs[3],
+        out_zero_point=inputs[4],
+        in_dtype=types[0].dtype,
+        out_dtype=types[0].dtype,
+    )
+
+
 # Default to None. If overridden by target, this will not be run.
 # Generic QNN Conv2D legalization function.
 @tvm.target.generic_func
diff --git a/src/relay/qnn/op/rsqrt.cc b/src/relay/qnn/op/rsqrt.cc
index 6d37b1aa8d5d..93baa308a796 100644
--- a/src/relay/qnn/op/rsqrt.cc
+++ b/src/relay/qnn/op/rsqrt.cc
@@ -69,7 +69,9 @@ Expr MakeQuantizedRsqrt(Expr x, Expr scale, Expr zero_point, Expr output_scale,
   return Call(op, {x, scale, zero_point, output_scale, output_zero_point}, Attrs(), {});
 }
 
-// Canonicalization set in python/tvm/relay/qnn/op/canonicalizations.py
+// Translation to relay is done via canonicalization/legalization functions in python
+// e.g. python/tvm/relay/qnn/op/canonicalizations.py or
+//      python/tvm/relay/qnn/op/legalizations.py
 RELAY_REGISTER_OP("qnn.rsqrt")
     .describe("Elementwise rsqrt for quantized tensors.")
     .set_num_inputs(5)
diff --git a/tests/python/relay/test_op_qnn_rsqrt.py b/tests/python/relay/test_op_qnn_rsqrt.py
index 1eb9b64057ca..0e40768343bd 100644
--- a/tests/python/relay/test_op_qnn_rsqrt.py
+++ b/tests/python/relay/test_op_qnn_rsqrt.py
@@ -15,8 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import tvm
 import numpy as np
+import tvm
 from tvm import relay
 
 
@@ -51,6 +51,7 @@ def test_saturation():
     func = relay.Function([x], y)
     mod = tvm.IRModule.from_expr(func)
     mod = relay.transform.InferType()(mod)
+    mod = relay.qnn.transform.Legalize()(mod)
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     func = mod["main"]
 
@@ -77,6 +78,7 @@ def test_saturation():
     func = relay.Function([x], y)
     mod = tvm.IRModule.from_expr(func)
     mod = relay.transform.InferType()(mod)
+    mod = relay.qnn.transform.Legalize()(mod)
     mod = relay.qnn.transform.CanonicalizeOps()(mod)
     func = mod["main"]
 

From b5ec138bd2e5365584ac02a977259bacf909fb7f Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 11:06:40 -0800
Subject: [PATCH 62/72] jostle ci


From fe54fa3f19af5a1cca02d004c3460377e5ee3c59 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 16:32:17 -0800
Subject: [PATCH 63/72] linting

---
 python/tvm/relay/qnn/op/canonicalizations.py               | 4 ++--
 python/tvm/relay/qnn/op/legalizations.py                   | 2 +-
 python/tvm/relay/qnn/op/op.py                              | 2 +-
 python/tvm/relay/transform/fake_quantization_to_integer.py | 1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index bd0108d72e64..05b68e731239 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -14,12 +14,12 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+"""Consist of utilities and methods for lowering QNN into mainline relay."""
 from typing import Callable
 
 import numpy as np
 import tvm
 from tvm import relay
-from tvm.relay.qnn.op.op import register_qnn_canonicalize
 
 
 def run_const_expr(expr: "relay.Expr") -> np.ndarray:
@@ -41,7 +41,7 @@ def create_integer_lookup_table(
     out_dtype: str = "uint8",
 ) -> np.ndarray:
     """
-    Return a table where each input indexes to the quantized output approximating the given function.
+    Return a table where each input indexes to the output quantizing the given function.
 
     Note this also supports mapping unsigned and signed integers to each other.
 
diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index 947e9d823134..fd835d72fc09 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -49,7 +49,7 @@ def legalize_qnn_dense(attrs, inputs, types):
 
 # Registering QNN dense legalization function.
 @reg.register_qnn_legalize("qnn.rsqrt")
-def legalize_qnn_dense(attrs, inputs, types):
+def legalize_qnn_rsqrt(attrs, inputs, types):
     return create_integer_lookup_op(
         input_arg=inputs[0],
         floating_point_func=lambda arr: 1 / np.sqrt(arr),
diff --git a/python/tvm/relay/qnn/op/op.py b/python/tvm/relay/qnn/op/op.py
index c83a32e2ce6a..335947b9f7ce 100644
--- a/python/tvm/relay/qnn/op/op.py
+++ b/python/tvm/relay/qnn/op/op.py
@@ -49,7 +49,7 @@ def register_qnn_canonicalize(op_name, legal_op=None, level=10):
     op_name : str
         The name of the operator
 
-    legal_op: function (attrs: Attrs, args: List[Expr], List[relay.Type]: arg_types) -> new_expr: Expr
+    legal_op: function (Attrs, List[Expr], List[relay.Type]) -> Expr
         The function for transforming an expr to another expr.
 
     level : int
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index 3337c7cfb894..7398dc98c83d 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -21,6 +21,7 @@
 from tvm.ir import TensorAffineType, TupleAffineType
 
 # import to register canonicalization funcs for fq2i
+# pylint: disable=unused-import
 from tvm.relay.qnn.op import canonicalizations
 from tvm.tir import bijective_layout
 

From 804e9fbe0643187973c4fafa1654004164b2891a Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:07:55 -0800
Subject: [PATCH 64/72] use take instead of gather

---
 python/tvm/relay/qnn/op/canonicalizations.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/qnn/op/canonicalizations.py b/python/tvm/relay/qnn/op/canonicalizations.py
index 05b68e731239..95e0cb60368d 100644
--- a/python/tvm/relay/qnn/op/canonicalizations.py
+++ b/python/tvm/relay/qnn/op/canonicalizations.py
@@ -155,8 +155,6 @@ def create_integer_lookup_op(
     in_dtype_num_bits = in_dtype_info.bits
 
     lookup_table = relay.const(lookup_table)
-    index_tensor = relay.reshape(input_arg, [-1])
-    index_tensor = relay.reinterpret(index_tensor, f"uint{in_dtype_num_bits}")
-    result = relay.gather(lookup_table, -1, index_tensor)
-    result = relay.reshape_like(result, input_arg)
+    index_tensor = relay.reinterpret(input_arg, f"uint{in_dtype_num_bits}")
+    result = relay.take(lookup_table, index_tensor, axis=0, mode="fast")
     return result

From 9a22774d96279c59f017d24e6a7ab4b8e22613d9 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:10:46 -0800
Subject: [PATCH 65/72] remove gather changes

---
 include/tvm/topi/transform.h                    |  2 +-
 src/relay/op/tensor/transform.cc                |  3 +--
 tests/python/relay/test_op_level3.py            |  7 +++----
 tests/python/topi/python/test_topi_transform.py | 10 ----------
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h
index acff301f4f07..5a0ec49773f7 100644
--- a/include/tvm/topi/transform.h
+++ b/include/tvm/topi/transform.h
@@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices,
     size_t indices_dim_i = static_cast<size_t>(GetConstInt(indices->shape[axis]));
     ICHECK_GE(indices_dim_i, 1);
   }
-  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint());
+  ICHECK(indices->dtype.is_int());
 
   Array<PrimExpr> out_shape;
   for (size_t i = 0; i < ndim_i; ++i) {
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 19f6cdf85574..4ae265df3a75 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3322,8 +3322,7 @@ bool GatherRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
         << "Gather: expect indices type to be TensorType but get " << types[1];
     return false;
   }
-  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint())
-      << "indices of gather must be tensor of integer";
+  ICHECK(indices->dtype.is_int()) << "indices of take must be tensor of integer";
   const auto param = attrs.as<GatherAttrs>();
   ICHECK(param != nullptr);
   ICHECK(param->axis.defined());
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 327575fb8955..1bfaabf4bfce 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -1278,12 +1278,12 @@ def test_scatter_add(self, target, dev, ref_data, dshape, ishape, axis, dtype, i
     ],
 )
 def test_gather(target, dev, executor_kind, data, axis, indices, ref_res):
-    def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"):
+    def verify_gather(data, axis, indices, ref_res):
         data = np.asarray(data, dtype="float32")
-        indices = np.asarray(indices, dtype=indices_dtype)
+        indices = np.asarray(indices, dtype="int32")
         ref_res = np.asarray(ref_res)
         d = relay.var("x", relay.TensorType(data.shape, "float32"))
-        i = relay.var("y", relay.TensorType(indices.shape, indices_dtype))
+        i = relay.var("y", relay.TensorType(indices.shape, "int32"))
         z = relay.gather(d, axis, i)
 
         func = relay.Function([d, i], z)
@@ -1294,7 +1294,6 @@ def verify_gather(data, axis, indices, ref_res, indices_dtype="int32"):
         tvm.testing.assert_allclose(op_res.numpy(), ref_res, rtol=1e-5)
 
     verify_gather(data, axis, indices, ref_res)
-    verify_gather(data, axis, indices, ref_res, indices_dtype="uint32")
 
 
diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index ddec14b16d01..50441699e95b 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -1011,16 +1011,6 @@ def test_gather():
     verify_gather(np.random.randn(4, 7, 5), 1, np.random.randint(low=0, high=7, size=(4, 10, 5)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 2)))
     verify_gather(np.random.randn(4, 7, 5), 2, np.random.randint(low=0, high=5, size=(4, 7, 10)))
-    verify_gather(
-        np.random.randn(4, 7, 5),
-        2,
-        np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint32"),
-    )
-    verify_gather(
-        np.random.randn(4, 7, 5),
-        2,
-        np.random.randint(low=0, high=5, size=(4, 7, 10)).astype("uint8"),
-    )
 
 
 @tvm.testing.uses_gpu

From a148ff1c14121a4fd36f4fed51de46d025e5b114 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:12:15 -0800
Subject: [PATCH 66/72] undo changes

---
 tests/python/topi/python/test_topi_transform.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index 50441699e95b..28e7c738e242 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -18,11 +18,13 @@
 import numpy as np
 import pytest
 import tvm
-import tvm.testing
+from tvm import te
+from tvm import topi
+from tvm import relay
 import tvm.topi.testing
-from tvm import relay, te, topi
 from tvm.contrib.nvcc import have_fp16
 
+import tvm.testing
 
 def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     A = te.placeholder(shape=in_shape, name="A")

From a75ea9f5d0ebd8161965c6084f76e9cfb9c6354d Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:12:35 -0800
Subject: [PATCH 67/72] undo changes

---
 tests/python/topi/python/test_topi_transform.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python/topi/python/test_topi_transform.py b/tests/python/topi/python/test_topi_transform.py
index 28e7c738e242..730d22cba16a 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -26,6 +26,7 @@
 
 import tvm.testing
 
+
 def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
     A = te.placeholder(shape=in_shape, name="A")
     B = topi.expand_dims(A, axis, num_newaxis)

From 3b3c68584864f301d84e9a4b61c3a53c343d7850 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Wed, 26 Jan 2022 19:13:14 -0800
Subject: [PATCH 68/72] undo changes

---
 tests/python/relay/test_op_level3.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 1bfaabf4bfce..bc0b354fe05e 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -21,8 +21,10 @@
 
 import numpy as np
 import pytest
+
 import tvm
 import tvm.testing
+
 from tvm import relay, te
 from tvm.error import TVMError
 from tvm.relay import create_executor, transform
@@ -30,6 +32,7 @@
 
 from utils import ref_funcs
 
+
 executor_kind = tvm.testing.parameter("graph", "debug")
 
 
From b609d6381dd23a6b94f042c052088d894ecd41bd Mon Sep 17 00:00:00 2001
From: "andrewzhaoluo (generated by with_the_same_user script)"
 <andrewzhaoluo@system76-pc.localdomain>
Date: Fri, 28 Jan 2022 19:34:45 +0000
Subject: [PATCH 69/72] move thing in range

---
 .../relay/test_pass_fake_quantization_to_integer.py    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/python/relay/test_pass_fake_quantization_to_integer.py b/tests/python/relay/test_pass_fake_quantization_to_integer.py
index 28166bb8be72..9cc359d472fd 100644
--- a/tests/python/relay/test_pass_fake_quantization_to_integer.py
+++ b/tests/python/relay/test_pass_fake_quantization_to_integer.py
@@ -305,14 +305,14 @@ def test_fake_quantize_global_avg_pool():
 
 
 def test_fake_quantize_rsqrt():
-    x = relay.var("x", shape=[1, 3, 224, 224], dtype="int8")
-    zero = relay.const(0)
+    x = relay.var("x", shape=[1, 3, 3, 3], dtype="int8")
+    mid_point = relay.const(-128)
 
-    x = relay.qnn.op.dequantize(x, relay.const(2.0), zero)
+    x = relay.qnn.op.dequantize(x, relay.const(0.125), mid_point)
     op = relay.rsqrt(x)
-    op = relay.qnn.op.quantize(op, relay.const(2.0), zero)
+    op = relay.qnn.op.quantize(op, relay.const(0.125), mid_point)
 
-    x_np = np.random.randint(-128, 127, size=[1, 3, 224, 224], dtype="int8")
+    x_np = np.random.randint(-128, 127, size=[1, 3, 3, 3], dtype="int8")
 
     compare_fq_to_int(op, [x_np], True)
 

From b0b7676f0f57d45d6e92c246f511bb0ad6b2fc0d Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 31 Jan 2022 14:44:15 -0800
Subject: [PATCH 70/72] lint

---
 tests/python/relay/test_op_level3.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index bc0b354fe05e..e58ceabd1879 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -21,10 +21,8 @@
 
 import numpy as np
 import pytest
-
 import tvm
 import tvm.testing
-
 from tvm import relay, te
 from tvm.error import TVMError
 from tvm.relay import create_executor, transform
@@ -32,7 +30,6 @@
 
 from utils import ref_funcs
 
-
 executor_kind = tvm.testing.parameter("graph", "debug")
 
 
@@ -1299,7 +1296,6 @@ def verify_gather(data, axis, indices, ref_res):
     verify_gather(data, axis, indices, ref_res)
 
 
-
 def test_gather_nd(target, dev, executor_kind):
     def verify_gather_nd(xshape, yshape, y_data, batch_dims=0, indices_dtype="int32"):
         x = relay.var("x", relay.TensorType(xshape, "float32"))

From 5b919f117b0d6a591baf784ede18f23fab9a3973 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 7 Feb 2022 09:17:34 -0800
Subject: [PATCH 71/72] remove unneeded line

---
 include/tvm/topi/transform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/tvm/topi/transform.h b/include/tvm/topi/transform.h
index 5a0ec49773f7..acff301f4f07 100644
--- a/include/tvm/topi/transform.h
+++ b/include/tvm/topi/transform.h
@@ -1321,7 +1321,7 @@ inline Tensor gather(const Tensor& data, int axis, const Tensor& indices,
     size_t indices_dim_i = static_cast<size_t>(GetConstInt(indices->shape[axis]));
     ICHECK_GE(indices_dim_i, 1);
   }
-  ICHECK(indices->dtype.is_int());
+  ICHECK(indices->dtype.is_int() || indices->dtype.is_uint());
 
   Array<PrimExpr> out_shape;
   for (size_t i = 0; i < ndim_i; ++i) {

From 3240c8653e111c84d5d71fd4fac9f5e61e5d8984 Mon Sep 17 00:00:00 2001
From: Andrew Luo <andrew.zhao.luo@gmail.com>
Date: Mon, 7 Feb 2022 09:19:15 -0800
Subject: [PATCH 72/72] jostle