diff --git a/nnvm/docs/api/python/index.rst b/nnvm/docs/api/python/index.rst index 71c9f83fca6f..2c574ac60994 100644 --- a/nnvm/docs/api/python/index.rst +++ b/nnvm/docs/api/python/index.rst @@ -10,7 +10,6 @@ For user compiler frontend - runtime symbol graph top diff --git a/nnvm/docs/api/python/runtime.rst b/nnvm/docs/api/python/runtime.rst deleted file mode 100644 index a187424c68ce..000000000000 --- a/nnvm/docs/api/python/runtime.rst +++ /dev/null @@ -1,8 +0,0 @@ -nnvm.runtime ------------- -.. automodule:: nnvm.runtime - -.. autofunction:: nnvm.runtime.create - -.. autoclass:: nnvm.runtime.Module - :members: diff --git a/nnvm/python/nnvm/compiler/build_module.py b/nnvm/python/nnvm/compiler/build_module.py index 82a2da4f5caa..8c4b1a46f6bd 100644 --- a/nnvm/python/nnvm/compiler/build_module.py +++ b/nnvm/python/nnvm/compiler/build_module.py @@ -4,9 +4,9 @@ import logging import tvm +from tvm.contrib import graph_runtime from . import graph_attr, graph_util from .. import graph as _graph -from .. import runtime OPT_PASS_LEVEL = { "SimplifyInference": 2, @@ -220,7 +220,7 @@ def _run_graph(graph, params): _, oshape = graph_util.infer_shape(graph, **shape) _, odtype = graph_util.infer_dtype(graph, **dtype) graph, libmod, _ = build(graph, target, shape, dtype) - m = runtime.create(graph, libmod, ctx) + m = graph_runtime.create(graph, libmod, ctx) set_input, run, get_output = m["set_input"], m["run"], m["get_output"] for k, v in params.items(): set_input(k, tvm.nd.array(v)) diff --git a/nnvm/tutorials/mobilenet_inference_gpu.py b/nnvm/tutorials/mobilenet_inference_gpu.py index 3f68c6ff255f..28e97244f88a 100644 --- a/nnvm/tutorials/mobilenet_inference_gpu.py +++ b/nnvm/tutorials/mobilenet_inference_gpu.py @@ -15,9 +15,9 @@ ###################################################################### # Register the NVCC Compiler Option # --------------------------------- -# NNVM optimizes the graph and relies on TVM to generate fast -# GPU code, to get the maximum performance, we need to enable -# nvcc's compiler hook. This gives better performance than nvrtc mode. +# NNVM optimizes the graph and relies on TVM to generate fast GPU code. +# To get the maximum performance, we need to enable nvcc's compiler hook. +# This gives better performance than nvrtc mode. @tvm.register_func def tvm_callback_cuda_compile(code): @@ -28,7 +28,7 @@ def tvm_callback_cuda_compile(code): # Prepare the Benchmark # --------------------- # We construct a standard imagenet inference benchmark. -# We use nnvm's testing utility to produce the model description and random parameters that so the example does not +# We use nnvm's testing utility to produce the model description and random parameters so that the example does not # depend on a specific front-end framework. # # .. note:: @@ -46,17 +46,17 @@ def tvm_callback_cuda_compile(code): batch_size=1, image_shape=image_shape) ###################################################################### -# Compile The Graph +# Compile the Graph # ----------------- # NNVM needs two things to compile a deep learning model: # -# - net which is the graph representation of the computation -# - params a dictionary of str to parameters. +# - net: the graph representation of the computation +# - params: a dictionary of str to parameters # # To compile the graph, we call the build function with the graph # configuration and parameters. -# When parameters are provided, NNVM will pre-compute certain part of the graph if possible, -# the new parameter set returned as the third return value. +# When parameters are provided, NNVM will pre-compute certain part of the graph if possible (e.g. simplify batch normalization to scale shift), +# and return the updated parameters. graph, lib, params = nnvm.compiler.build( net, target, shape={"data": data_shape}, params=params) @@ -65,7 +65,7 @@ def tvm_callback_cuda_compile(code): # Run the Compiled Module # ----------------------- # -# To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph the lib and context. +# To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph, the lib, and context. # Thanks to TVM, we can deploy the compiled module to many platforms and languages. # The deployment module is designed to contain minimum dependencies. # This example runs on the same machine. @@ -79,5 +79,5 @@ def tvm_callback_cuda_compile(code): module.run() # get output out = module.get_output(0, tvm.nd.empty(out_shape)) -# Convert to numpy +# convert to numpy out.asnumpy()