From 1f88ff4e4841cf53236818527108062f998c0606 Mon Sep 17 00:00:00 2001
From: Kimish Patel <kimishpatel@meta.com>
Date: Mon, 28 Aug 2023 18:19:37 -0700
Subject: [PATCH] Update doc and code to run quantized model (#157)

Summary:
Pull Request resolved: https://github.com/pytorch/executorch/pull/157

- Fix doc to separte 1) generating quantized model and b) running it with
  executor_runner
- Include <tuple> in chose_qparams
- Include quantized ops by default in executor_runner

Reviewed By: larryliu0820, guangy10

Differential Revision: D48752106

fbshipit-source-id: 30f4e7ba121abeb01b7b97020c2fef0f5d2ac891
---
 examples/README.md                          | 14 +++++++++++++-
 examples/executor_runner/targets.bzl        |  6 +++---
 examples/quantization/test_quantize.sh      |  3 +--
 kernels/quantized/cpu/op_choose_qparams.cpp |  1 +
 4 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/examples/README.md b/examples/README.md
index 98d30e87ba..d79927cfb7 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -58,7 +58,9 @@ buck2 run examples/executor_runner:executor_runner -- --model_path mv2.pte
 ## Quantization
 Here is the [Quantization Flow Docs](/docs/website/docs/tutorials/quantization_flow.md).
 
-You can run quantization test with the following command:
+### Generating quantized model
+
+You can generate quantized model with the following command (following example is for mv2, aka MobileNetV2):
 ```bash
 python3 -m examples.quantization.example --model_name "mv2" --so-library "<path/to/so/lib>" # for MobileNetv2
 ```
@@ -80,6 +82,16 @@ you can also find the valid quantized example models by running:
 buck2 run executorch/examples/quantization:example -- --help
 ```
 
+### Running quantized model
+
+Quantized model can be run via executor_runner, similar to floating point model, via, as shown above:
+
+```bash
+buck2 run examples/executor_runner:executor_runner -- --model_path mv2.pte
+```
+
+Note that, running quantized model, requires various quantized/dequantize operators, available in [quantized kernel lib](/kernels/quantized).
+
 ## XNNPACK Backend
 Please see [Backend README](backend/README) for XNNPACK quantization, export, and run workflow.
 
diff --git a/examples/executor_runner/targets.bzl b/examples/executor_runner/targets.bzl
index 5e313e2583..1784d120cc 100644
--- a/examples/executor_runner/targets.bzl
+++ b/examples/executor_runner/targets.bzl
@@ -28,13 +28,13 @@ def define_common_targets():
 
     register_custom_op = native.read_config("executorch", "register_custom_op", "0")
     register_quantized_ops = native.read_config("executorch", "register_quantized_ops", "0")
-    custom_ops_lib = []
+
+    # Include quantized ops to be able to run quantized model with portable ops
+    custom_ops_lib = ["//executorch/kernels/quantized:generated_lib"]
     if register_custom_op == "1":
         custom_ops_lib.append("//executorch/examples/custom_ops:lib_1")
     elif register_custom_op == "2":
         custom_ops_lib.append("//executorch/examples/custom_ops:lib_2")
-    if register_quantized_ops == "1":
-        custom_ops_lib.append("//executorch/kernels/quantized:generated_lib")
 
     # Test driver for models, uses all portable kernels and a demo backend. This
     # is intended to have minimal dependencies. If you want a runner that links
diff --git a/examples/quantization/test_quantize.sh b/examples/quantization/test_quantize.sh
index 93aef82566..88b3399a9d 100644
--- a/examples/quantization/test_quantize.sh
+++ b/examples/quantization/test_quantize.sh
@@ -32,8 +32,7 @@ test_buck2_quantization() {
   ${PYTHON_EXECUTABLE} -m "examples.quantization.example" --so_library="$SO_LIB" --model_name="$1"
 
   echo 'Running executor_runner'
-  buck2 run //examples/executor_runner:executor_runner \
-    --config=executorch.register_quantized_ops=1 -- --model_path="./$1.pte"
+  buck2 run //examples/executor_runner:executor_runner -- --model_path="./$1.pte"
   # should give correct result
 
   echo "Removing $1.pte"
diff --git a/kernels/quantized/cpu/op_choose_qparams.cpp b/kernels/quantized/cpu/op_choose_qparams.cpp
index bcc2902de8..aa01a5d2fd 100644
--- a/kernels/quantized/cpu/op_choose_qparams.cpp
+++ b/kernels/quantized/cpu/op_choose_qparams.cpp
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <cinttypes>
 #include <cmath>
+#include <tuple>
 /**
  * For an input tensor, use the scale and zero_point arguments to quantize it.
  */