From 1f88ff4e4841cf53236818527108062f998c0606 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Mon, 28 Aug 2023 18:19:37 -0700 Subject: [PATCH] Update doc and code to run quantized model (#157) Summary: Pull Request resolved: https://github.com/pytorch/executorch/pull/157 - Fix doc to separte 1) generating quantized model and b) running it with executor_runner - Include in chose_qparams - Include quantized ops by default in executor_runner Reviewed By: larryliu0820, guangy10 Differential Revision: D48752106 fbshipit-source-id: 30f4e7ba121abeb01b7b97020c2fef0f5d2ac891 --- examples/README.md | 14 +++++++++++++- examples/executor_runner/targets.bzl | 6 +++--- examples/quantization/test_quantize.sh | 3 +-- kernels/quantized/cpu/op_choose_qparams.cpp | 1 + 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/examples/README.md b/examples/README.md index 98d30e87ba..d79927cfb7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -58,7 +58,9 @@ buck2 run examples/executor_runner:executor_runner -- --model_path mv2.pte ## Quantization Here is the [Quantization Flow Docs](/docs/website/docs/tutorials/quantization_flow.md). -You can run quantization test with the following command: +### Generating quantized model + +You can generate quantized model with the following command (following example is for mv2, aka MobileNetV2): ```bash python3 -m examples.quantization.example --model_name "mv2" --so-library "" # for MobileNetv2 ``` @@ -80,6 +82,16 @@ you can also find the valid quantized example models by running: buck2 run executorch/examples/quantization:example -- --help ``` +### Running quantized model + +Quantized model can be run via executor_runner, similar to floating point model, via, as shown above: + +```bash +buck2 run examples/executor_runner:executor_runner -- --model_path mv2.pte +``` + +Note that, running quantized model, requires various quantized/dequantize operators, available in [quantized kernel lib](/kernels/quantized). + ## XNNPACK Backend Please see [Backend README](backend/README) for XNNPACK quantization, export, and run workflow. diff --git a/examples/executor_runner/targets.bzl b/examples/executor_runner/targets.bzl index 5e313e2583..1784d120cc 100644 --- a/examples/executor_runner/targets.bzl +++ b/examples/executor_runner/targets.bzl @@ -28,13 +28,13 @@ def define_common_targets(): register_custom_op = native.read_config("executorch", "register_custom_op", "0") register_quantized_ops = native.read_config("executorch", "register_quantized_ops", "0") - custom_ops_lib = [] + + # Include quantized ops to be able to run quantized model with portable ops + custom_ops_lib = ["//executorch/kernels/quantized:generated_lib"] if register_custom_op == "1": custom_ops_lib.append("//executorch/examples/custom_ops:lib_1") elif register_custom_op == "2": custom_ops_lib.append("//executorch/examples/custom_ops:lib_2") - if register_quantized_ops == "1": - custom_ops_lib.append("//executorch/kernels/quantized:generated_lib") # Test driver for models, uses all portable kernels and a demo backend. This # is intended to have minimal dependencies. If you want a runner that links diff --git a/examples/quantization/test_quantize.sh b/examples/quantization/test_quantize.sh index 93aef82566..88b3399a9d 100644 --- a/examples/quantization/test_quantize.sh +++ b/examples/quantization/test_quantize.sh @@ -32,8 +32,7 @@ test_buck2_quantization() { ${PYTHON_EXECUTABLE} -m "examples.quantization.example" --so_library="$SO_LIB" --model_name="$1" echo 'Running executor_runner' - buck2 run //examples/executor_runner:executor_runner \ - --config=executorch.register_quantized_ops=1 -- --model_path="./$1.pte" + buck2 run //examples/executor_runner:executor_runner -- --model_path="./$1.pte" # should give correct result echo "Removing $1.pte" diff --git a/kernels/quantized/cpu/op_choose_qparams.cpp b/kernels/quantized/cpu/op_choose_qparams.cpp index bcc2902de8..aa01a5d2fd 100644 --- a/kernels/quantized/cpu/op_choose_qparams.cpp +++ b/kernels/quantized/cpu/op_choose_qparams.cpp @@ -11,6 +11,7 @@ #include #include #include +#include /** * For an input tensor, use the scale and zero_point arguments to quantize it. */