diff --git a/docs/dev/index.rst b/docs/dev/index.rst index b4fb37d790f4..76d50f496e75 100644 --- a/docs/dev/index.rst +++ b/docs/dev/index.rst @@ -423,3 +423,4 @@ microTVM :maxdepth: 1 microtvm_design + model_library_format diff --git a/docs/dev/model_library_format.rst b/docs/dev/model_library_format.rst new file mode 100644 index 000000000000..fec90de4bcea --- /dev/null +++ b/docs/dev/model_library_format.rst @@ -0,0 +1,169 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Model Library Format +==================== + +About Model Library Format +-------------------------- + +TVM traditionally exports generated libraries as Dynamic Shared Objects (e.g. DLLs (Windows) or .so +(linux)). Inferences can be performed using those libraries by loading them into an executable using +``libtvm_runtime.so``. This process is very dependent on services provided by traditional OS. + +For deployment to unconventional platforms (e.g. those lacking traditional OS), TVM provides another +output format, Model Library Format. Initially, the microTVM project is the primary use case for this +format. Should it become useful in other use cases (and in particular, should it become possible to +export BYOC artifacts in Model Library Format), it could be used as a general-purpose TVM export +format. Model Library Format is a tarball containing a file for each piece of the TVM compiler +output. + +What can be Exported? +--------------------- + +At the time of writing, export is limited to full models built with ``tvm.relay.build``. + +Directory Layout +---------------- + +Model Library Format is contained within a tarball. All paths are relative to the root of the +tarball: + +- ``/`` - Root of the tarball + + - ``codegen`` - Root directory for all generated device code + + - (see `codegen`_ section) + + - ``executor-config/`` - Configuration for the executor which drives model inference + + - ``graph/`` - Root directory containing configuration for the GraphExecutor + + - ``graph.json`` - GraphExecutor JSON configuration + + - ``metadata.json`` - Machine-parseable metadata for this model + + - ``parameters/`` - Root directory where simplified parameters are placed + + - ``.params`` - Parameters for the model tvm.relay._save_params format + + - ``src/`` - Root directory for all source code consumed by TVM + + - ``relay.txt`` - Relay source code for the generated model + +Description of Sub-directories +------------------------------ + +.. _subdir_codegen: + +``codegen`` +^^^^^^^^^^^ + +All TVM-generated code is placed in this directory. At the time of writing, there is 1 file per +Module in the generated Module tree, though this restriction may change in the future. Files in +this directory should have filenames of the form ``/(lib|src)/.``. + +These components are described below: + + * ```` - Identifies the TVM target on which the code should run. Currently, only ``host`` + is supported. + * ```` - A unique slug identifying this file. Currently ``lib``, with ``>`` an + auto-incrementing integer. + * ```` - Suffix identifying the filename format. Currently ``c`` or ``o``. + +An example directory tree for a CPU-only model is shown below: + +- ``codegen/`` - Codegen directory + + - ``host/`` - Generated code for ``target_host`` + + - ``lib/`` - Generated binary object files + + - ``lib0.o`` - LLVM module (if ``llvm`` target is used) + - ``lib1.o`` - LLVM CRT Metadata Module (if ``llvm`` target is used) + + - ``src/`` - Generated C source + + - ``lib0.c`` - C module (if ``c`` target is used) + - ``lib1.c`` - C CRT Metadata module (if ``c`` target is used) + +``executor-config`` +^^^^^^^^^^^^^^^^^^^ + +Contains machine-parsable configuration for executors which can drive model inference. Currently, +only the GraphExecutor produces configuration for this directory, in ``graph/graph.json``. This +file should be read in and the resulting string supplied to the ``GraphExecutor()`` constructor for +parsing. + +``parameters`` +^^^^^^^^^^^^^^ + +Contains machine-parseable parameters. A variety of formats may be provided, but at present, only +the format produced by ``tvm.relay._save_params`` is supplied. When building with +``tvm.relay.build``, the ``name`` parameter is considered to be the model name. A single file is +created in this directory ``.json``. + +``src`` +^^^^^^^ + +Contains source code parsed by TVM. Currently, just the Relay source code is created in +``src/relay.txt``. + +Metadata +-------- + +Machine-parseable metadata is placed in a file ``metadata.json`` at the root of the tarball. +Metadata is a dictionary with these keys: + +- ``export_datetime``: Timestamp when this Model Library Format was generated, in + `strftime `_ + format ``"%Y-%M-%d %H:%M:%SZ",``. +- ``memory``: A summary of the memory usage of each generated function. Documented in + `Memory Usage Summary`_. +- ``model_name``: The name of this model (e.g. the ``name`` parameter supplied to + ``tvm.relay.build``). +- ``executors``: A list of executors supported by this model. Currently, this list is always + ``["graph"]``. +- ``target``: A dictionary mapping ``device_type`` (the underlying integer, as a string) to the + sub-target which describes that relay backend used for that ``device_type``. +- ``version``: A numeric version number that identifies the format used in this Model Library + Format. This number is incremented when the metadata structure or on-disk structure changes. + This document reflects version ``5``. + +Memory Usage Summary +^^^^^^^^^^^^^^^^^^^^ + +A dictionary with these sub-keys: + + - ``"main"``: ``list[MainFunctionWorkspaceUsage]``. A list summarizing memory usage for each + workspace used by the main function and all sub-functions invoked. + - ``"operator_functions"``: ``map[string, list[FunctionWorkspaceUsage]]``. Maps operator function + name to a list summarizing memory usage for each workpace used by the function. + +A ``MainFunctionWorkspaceUsage`` is a dict with these keys: + +- ``"device"``: ``int``. The ``device_type`` associated with this workspace. +- ``"workspace_size_bytes"``: ``int``. Number of bytes needed in this workspace by this function + and all sub-functions invoked. +- ``"constants_size_bytes"``: ``int``. Size of the constants used by the main function. +- ``"io_size_bytes"``: ``int``. Sum of the sizes of the buffers used from this workspace by this + function and sub-functions. + +A ``FunctionWorkspaceUsage`` is a dict with these keys: + +- ``"device"``: ``int``. The ``device_type`` associated with this workspace. +- ``"workspace_size_bytes"``: ``int``. Number of bytes needed in this workspace by this function. diff --git a/python/tvm/driver/tvmc/model.py b/python/tvm/driver/tvmc/model.py index 8c8828ddd49b..7dc3fd4cdd36 100644 --- a/python/tvm/driver/tvmc/model.py +++ b/python/tvm/driver/tvmc/model.py @@ -336,8 +336,8 @@ def import_package(self, package_path: str): with open(temp.relpath("metadata.json")) as metadata_json: metadata = json.load(metadata_json) - is_graph_runtime = "graph" in metadata["runtimes"] - graph = temp.relpath("runtime-config/graph/graph.json") if is_graph_runtime else None + has_graph_executor = "graph" in metadata["executors"] + graph = temp.relpath("executor-config/graph/graph.json") if has_graph_executor else None params = temp.relpath("parameters/default.params") self.type = "mlf" diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py index 87c067051f82..ad49ee7d9578 100644 --- a/python/tvm/micro/model_library_format.py +++ b/python/tvm/micro/model_library_format.py @@ -225,7 +225,7 @@ def reset(tarinfo): tar_f.add(str(source_dir), arcname=".", filter=reset) -_GENERATED_VERSION = 4 +_GENERATED_VERSION = 5 def _export_graph_model_library_format( @@ -241,7 +241,7 @@ def _export_graph_model_library_format( Temporary directory to populate with Model Library Format contents. """ is_aot = isinstance(mod, executor_factory.AOTExecutorFactoryModule) - runtime = ["aot"] if is_aot else ["graph"] + executor = ["aot"] if is_aot else ["graph"] metadata = { "version": _GENERATED_VERSION, @@ -249,7 +249,7 @@ def _export_graph_model_library_format( "export_datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%SZ"), "memory": _build_memory_map(mod), "target": {int(k): str(v) for k, v in mod.target.items()}, - "runtimes": runtime, + "executors": executor, "style": "full-model", } @@ -272,7 +272,7 @@ def _export_graph_model_library_format( f.write(str(mod.ir_mod)) if not is_aot: - graph_config_dir = tempdir / "runtime-config" / "graph" + graph_config_dir = tempdir / "executor-config" / "graph" graph_config_dir.mkdir(parents=True) with open(graph_config_dir / "graph.json", "w") as f: f.write(mod.get_executor_config()) @@ -363,7 +363,7 @@ def _export_operator_model_library_format(mod: build_module.OperatorModule, temp "export_datetime": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%SZ"), "memory": memory_map, "target": {k: str(v) for k, v in targets.items()}, - "runtimes": [], + "executors": [], "style": "operator", } with open(tempdir / "metadata.json", "w") as metadata_f: diff --git a/tests/python/driver/tvmc/test_mlf.py b/tests/python/driver/tvmc/test_mlf.py index 4669fab916a6..0426f5678153 100644 --- a/tests/python/driver/tvmc/test_mlf.py +++ b/tests/python/driver/tvmc/test_mlf.py @@ -18,6 +18,7 @@ import pytest import os import shlex +import sys import tvm from tvm.driver import tvmc @@ -130,3 +131,7 @@ def test_tvmc_import_package_mlf_aot(tflite_mobilenet_v1_1_quant, tflite_compile assert tvmc_package.graph is None, ".graph must not be set in the MLF archive for AOT executor." assert tvmc_package.params is not None, ".params must be set in the MLF archive." assert tvmc_package.type == "mlf", ".type must be set to 'mlf' in the MLF format." + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py index 246c0336a001..a15e37925eea 100644 --- a/tests/python/unittest/test_micro_model_library_format.py +++ b/tests/python/unittest/test_micro_model_library_format.py @@ -56,7 +56,7 @@ def test_export_operator_model_library_format(): with open(os.path.join(extract_dir, "metadata.json")) as json_f: metadata = json.load(json_f) - assert metadata["version"] == 4 + assert metadata["version"] == 5 assert metadata["model_name"] == "add" export_datetime = datetime.datetime.strptime( metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" @@ -89,7 +89,7 @@ def test_export_operator_model_library_format(): def validate_graph_json(extract_dir, factory): - with open(os.path.join(extract_dir, "runtime-config", "graph", "graph.json")) as graph_f: + with open(os.path.join(extract_dir, "executor-config", "graph", "graph.json")) as graph_f: graph_json = graph_f.read() assert graph_json == factory.graph_json @@ -141,7 +141,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ with open(os.path.join(extract_dir, "metadata.json")) as json_f: metadata = json.load(json_f) - assert metadata["version"] == 4 + assert metadata["version"] == 5 assert metadata["model_name"] == "add" export_datetime = datetime.datetime.strptime( metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" @@ -221,7 +221,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ with open(os.path.join(extract_dir, "metadata.json")) as json_f: metadata = json.load(json_f) - assert metadata["version"] == 4 + assert metadata["version"] == 5 assert metadata["model_name"] == "add" export_datetime = datetime.datetime.strptime( metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" @@ -300,7 +300,7 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1 with open(os.path.join(extract_dir, "metadata.json")) as json_f: metadata = json.load(json_f) - assert metadata["version"] == 4 + assert metadata["version"] == 5 assert metadata["model_name"] == "qnn_conv2d" export_datetime = datetime.datetime.strptime( metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"