From 5d9b682aaa1665dfb1e19e3eb1c267213c82de8a Mon Sep 17 00:00:00 2001 From: Jorn Tuyls Date: Wed, 24 Feb 2021 23:32:03 +0000 Subject: [PATCH] [BYOC][VitisAI] Fix issue in Vitis AI codegen out tensor names matching & update docs and docker (#7350) * Fix bug in vitis ai codegen out tensor names matching & update docs & update docker * Update vitis_ai.rst * Move gpg-agent package installation to vitis ai core script * Refactor install_vitis_ai_core script * Update docs/deploy/vitis_ai.rst Co-authored-by: Cody Yu * Update docs/deploy/vitis_ai.rst Co-authored-by: Cody Yu * Update vitis-ai docs pynq/edge setup & adjustements for comments * Update python/tvm/contrib/target/vitis_ai.py Co-authored-by: Cody Yu * Reorg Vitis AI dockerfile to make sure gpg-agent is installed before llvm Co-authored-by: Jorn Tuyls Co-authored-by: Cody Yu --- docker/Dockerfile.demo_vitis_ai | 9 +- .../install/ubuntu_install_vitis_ai_core.sh | 12 +-- docs/deploy/vitis_ai.rst | 95 +++++++++++++++---- python/tvm/contrib/target/vitis_ai.py | 6 +- python/tvm/relay/op/contrib/vitis_ai.py | 4 + 5 files changed, 93 insertions(+), 33 deletions(-) diff --git a/docker/Dockerfile.demo_vitis_ai b/docker/Dockerfile.demo_vitis_ai index 58326b66bf0c..8cc623e2f38c 100644 --- a/docker/Dockerfile.demo_vitis_ai +++ b/docker/Dockerfile.demo_vitis_ai @@ -20,10 +20,13 @@ FROM xilinx/vitis-ai:latest RUN apt-get update --fix-missing - COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh +# Install Vitis-AI ubuntu dependencies +COPY install/ubuntu_install_vitis_ai_core.sh /install/ubuntu_install_vitis_ai_core.sh +RUN bash /install/ubuntu_install_vitis_ai_core.sh + COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh RUN bash /install/ubuntu_install_python.sh @@ -43,10 +46,6 @@ ENV PATH $PATH:$CARGO_HOME/bin:/usr/lib/go-1.10/bin COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh RUN bash /install/ubuntu_install_java.sh -# Install Vitis-AI ubuntu dependencies -COPY install/ubuntu_install_vitis_ai_core.sh /install/ubuntu_install_vitis_ai_core.sh -RUN bash /install/ubuntu_install_vitis_ai_core.sh - # Install dependencies inside vitis-ai-tensorflow conda RUN . $VAI_ROOT/conda/etc/profile.d/conda.sh && \ conda activate vitis-ai-tensorflow && \ diff --git a/docker/install/ubuntu_install_vitis_ai_core.sh b/docker/install/ubuntu_install_vitis_ai_core.sh index ea05ffd170fe..a2d7c2ebe332 100644 --- a/docker/install/ubuntu_install_vitis_ai_core.sh +++ b/docker/install/ubuntu_install_vitis_ai_core.sh @@ -21,9 +21,9 @@ set -u set -o pipefail # install libraries for building Vitis-AI on ubuntu -apt-get update && apt-get install -y --no-install-recommends \ - graphviz\ - gnupg2 - -apt-get update && apt-get install -y gcc-aarch64-linux-gnu - +apt-get update && apt-get install -y \ + graphviz \ + gnupg2 \ + gpg-agent \ + gcc-aarch64-linux-gnu \ + && rm -rf /var/lib/apt/lists/* diff --git a/docs/deploy/vitis_ai.rst b/docs/deploy/vitis_ai.rst index df29f16f9d8d..7de8f58ce54f 100755 --- a/docs/deploy/vitis_ai.rst +++ b/docs/deploy/vitis_ai.rst @@ -304,15 +304,22 @@ Edge hardware setup This section provides instructions for setting up with the `Pynq `__ platform but Petalinux based flows are also supported. -1. Download the Pynq v2.5 image for your target (use Z1 or Z2 for +1. Download the Pynq v2.6 image for your target (use Z1 or Z2 for Ultra96 target depending on board version) Link to image: - https://github.com/Xilinx/PYNQ/releases/tag/v2.5 + https://github.com/Xilinx/PYNQ/releases/tag/v2.6.0 2. Follow Pynq instructions for setting up the board: `pynq setup `__ -3. After connecting to the board, make sure to run as root. Execute +3. After connecting to the board, make sure to run as root. **Execute** ``su`` -4. Set up DPU on Pynq by following the steps here: `DPU Pynq - setup `__ +4. Set up DPU on Pynq: + + .. code:: bash + + git clone --branch v1.2.0 --recursive --shallow-submodules https://github.com/Xilinx/DPU-PYNQ.git + cd DPU-PYNQ/upgrade + make + pip3 install pynq-dpu==1.2.0 + 5. Run the following command to download the DPU bitstream: .. code:: bash @@ -343,7 +350,7 @@ interface between TVM and Vitis-AI tools. .. code:: bash apt-get install libhdf5-dev - pip3 install pydot h5py + pip3 install pydot==1.4.1 h5py==2.8.0 2. Install PyXIR @@ -362,16 +369,17 @@ interface between TVM and Vitis-AI tools. mkdir build cp cmake/config.cmake build cd build + echo set\(USE_LLVM OFF\) >> config.cmake echo set\(USE_VITIS_AI ON\) >> config.cmake cmake .. - make + make tvm_runtime -j$(nproc) 4. Install TVM .. code:: bash cd tvm/python - pip3 install -e . --user + pip3 install -e . 5. Check whether the setup was successful in the Python shell: @@ -441,7 +449,7 @@ TVM. import tvm import tvm.relay as relay from tvm.contrib.target import vitis_ai - from tvm.contrib import util, graph_runtime + from tvm.contrib import utils, graph_runtime from tvm.relay.build_module import bind_params_by_name from tvm.relay.op.contrib.vitis_ai import annotation @@ -524,6 +532,8 @@ model in TVM with Vitis-AI at the edge. The first couple of steps will have to be run on the host machine and take care of quantization and compilation for deployment at the edge. +A complete ResNet 18 example can be found `here `__. + Host steps ^^^^^^^^^^ @@ -541,7 +551,7 @@ TVM. import tvm import tvm.relay as relay from tvm.contrib.target import vitis_ai - from tvm.contrib import util, graph_runtime + from tvm.contrib import utils, graph_runtime from tvm.relay.build_module import bind_params_by_name from tvm.relay.op.contrib.vitis_ai import annotation @@ -549,12 +559,47 @@ After importing a convolutional neural network model using the usual Relay API's, annotate the Relay expression for the given Vitis-AI DPU target and partition the graph. +.. note:: + + We recommend converting DPU convolutions' data layouts to NHWC and CPU convolutions' + data layouts to NCHW for best DPU and out of the box CPU performance. You can use the + ConvertLayout transformation pass two times to achieve this as demonstrated in the code + block underneath. You can also leave the CPU convolution layouts in NHWC and tune ARM CPU + performance for this data layout to avoid the layout transformation overheads introduced by + executing DPU convolutions in NHWC and CPU convolutions in NCHW + (check out the `AutoScheduling `__ + and `AutoTuning `__ + tutorials for this). + .. code:: python mod["main"] = bind_params_by_name(mod["main"], params) + + # For edge DPU we recommend converting the convolutions' data layout + # to NHWC for best performance. Therefore, we first convert the layouts + # of all convolutions to NHWC before partitioning. Afterwards, we can + # convert any remaining convolutions (to be executed on CPU) back to NCHW. + desired_layouts = {'nn.conv2d': ['NHWC', 'default']} + seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), + relay.transform.ConvertLayout(desired_layouts), + relay.transform.FoldConstant()]) + with tvm.transform.PassContext(opt_level=3): + mod = seq(mod) + + # Annotate and partition the Relay expression for the given target mod = annotation(mod, params, target) mod = relay.transform.MergeCompilerRegions()(mod) mod = relay.transform.PartitionGraph()(mod) + + # After partitioning we recommend transforming the remaining convolutions + # (that will be executed on CPU, if any) back to NCHW data layout + # for best CPU performance + desired_layouts = {'nn.conv2d': ['NCHW', 'default']} + seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), + relay.transform.ConvertLayout(desired_layouts), + relay.transform.FoldConstant()]) + with tvm.transform.PassContext(opt_level=3): + mod = seq(mod) Now, we can build the TVM runtime library for executing the model. The TVM target is 'llvm' as the operations that can't be handled by the DPU @@ -572,13 +617,9 @@ can be included. .. code:: python - from tvm.contrib import util - - temp = util.tempdir() - tvm_target = 'llvm' target='DPUCZDX8G-zcu104' - export_rt_mod_file = temp.relpath("vitis_ai.rtmod") + export_rt_mod_file = "vitis_ai.rtmod" with tvm.transform.PassContext(opt_level=3, config= {'relay.ext.vitis_ai.options.target': target, 'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file}): @@ -604,9 +645,9 @@ Save the TVM lib module so that the Vitis-AI runtime module will also be exporte .. code:: python - from tvm.contrib import util + from tvm.contrib import utils - temp = util.tempdir() + temp = utils.tempdir() lib.export_library(temp.relpath("tvm_lib.so")) After quantizing and compiling the model for Vitis-AI acceleration using the @@ -638,15 +679,31 @@ Edge steps ^^^^^^^^^^ After setting up TVM with Vitis-AI on the edge device, you can now load -the TVM runtime module into memory and feed inputs for inference. +the TVM runtime module into memory and feed inputs for inference. A nearly +complete runtiem script can be found underneath. Make sure to run the script +as root (execute ``su`` in terminal to log into root). + + +.. note:: + + You will see a warning about the 'cpu-tf' runtime not being found. This warning is + expected on the board and can be ignored. Note also that you **shouldn't** import the + PyXIR targets in the run script (``import pyxir.contrib.target.DPUCZDX8G``). .. code:: python + import pyxir + import tvm + from tvm.contrib import graph_runtime + ctx = tvm.cpu() + + # input_name = ... + # input_data = ... # load the module into memory lib = tvm.runtime.load_module("tvm_dpu_arm.so") module = graph_runtime.GraphModule(lib["default"](tvm.cpu())) - module.set_input(name, data) + module.set_input(input_name, input_data) module.run() diff --git a/python/tvm/contrib/target/vitis_ai.py b/python/tvm/contrib/target/vitis_ai.py index d4931d9e3f48..f319fd799829 100644 --- a/python/tvm/contrib/target/vitis_ai.py +++ b/python/tvm/contrib/target/vitis_ai.py @@ -132,14 +132,14 @@ def vitis_ai_compiler(ref): layers = xgraph.get_layers() # Get the output tensor names using XGraph and output Relay ids - out_tensor_names = [] + out_tensor_names = ["unknown_name"] * len(output_relay_ids) for layer in layers: if not layer.internal: for relay_id in layer.attrs["relay_id"]: if relay_id in output_relay_ids: - out_tensor_names.append(layer.name) + out_tensor_names[output_relay_ids.index(relay_id)] = layer.name break - if not out_tensor_names: + if any([name == "unkown_name" for name in out_tensor_names]): raise ValueError( "During codegeneration the loading of subexpression \ failed due to output tensor name mismatch in Relay PyXIR interface." diff --git a/python/tvm/relay/op/contrib/vitis_ai.py b/python/tvm/relay/op/contrib/vitis_ai.py index fa17c63fc00a..aaa9f99e61ed 100644 --- a/python/tvm/relay/op/contrib/vitis_ai.py +++ b/python/tvm/relay/op/contrib/vitis_ai.py @@ -85,6 +85,10 @@ def visit_call(self, call): def annotation(mod, params, target): """Annotate Relay expression for Vitis-AI DPU accelerators""" + # We need type information for supporting models that contain operations that don't + # have a Relay to XLayer translation + mod = relay.transform.InferType()(mod) + xgraph = pyxir.frontend.tvm.from_relay(mod, params, postprocessing=None) xgraph = pyxir.partition(xgraph, targets=[target])