diff --git a/nnvm/docs/api/python/index.rst b/nnvm/docs/api/python/index.rst
index 71c9f83fca6f..2c574ac60994 100644
--- a/nnvm/docs/api/python/index.rst
+++ b/nnvm/docs/api/python/index.rst
@@ -10,7 +10,6 @@ For user
 
    compiler
    frontend
-   runtime
    symbol
    graph
    top
diff --git a/nnvm/docs/api/python/runtime.rst b/nnvm/docs/api/python/runtime.rst
deleted file mode 100644
index a187424c68ce..000000000000
--- a/nnvm/docs/api/python/runtime.rst
+++ /dev/null
@@ -1,8 +0,0 @@
-nnvm.runtime
-------------
-.. automodule:: nnvm.runtime
-
-.. autofunction:: nnvm.runtime.create
-
-.. autoclass:: nnvm.runtime.Module
-    :members:
diff --git a/nnvm/python/nnvm/compiler/build_module.py b/nnvm/python/nnvm/compiler/build_module.py
index 82a2da4f5caa..8c4b1a46f6bd 100644
--- a/nnvm/python/nnvm/compiler/build_module.py
+++ b/nnvm/python/nnvm/compiler/build_module.py
@@ -4,9 +4,9 @@
 
 import logging
 import tvm
+from tvm.contrib import graph_runtime
 from . import graph_attr, graph_util
 from .. import graph as _graph
-from .. import runtime
 
 OPT_PASS_LEVEL = {
     "SimplifyInference": 2,
@@ -220,7 +220,7 @@ def _run_graph(graph, params):
     _, oshape = graph_util.infer_shape(graph, **shape)
     _, odtype = graph_util.infer_dtype(graph, **dtype)
     graph, libmod, _ = build(graph, target, shape, dtype)
-    m = runtime.create(graph, libmod, ctx)
+    m = graph_runtime.create(graph, libmod, ctx)
     set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
     for k, v in params.items():
         set_input(k, tvm.nd.array(v))
diff --git a/nnvm/tutorials/mobilenet_inference_gpu.py b/nnvm/tutorials/mobilenet_inference_gpu.py
index 3f68c6ff255f..28e97244f88a 100644
--- a/nnvm/tutorials/mobilenet_inference_gpu.py
+++ b/nnvm/tutorials/mobilenet_inference_gpu.py
@@ -15,9 +15,9 @@
 ######################################################################
 # Register the NVCC Compiler Option
 # ---------------------------------
-# NNVM optimizes the graph and relies on TVM to generate fast
-# GPU code, to get the maximum performance, we need to enable
-# nvcc's compiler hook. This gives better performance than nvrtc mode.
+# NNVM optimizes the graph and relies on TVM to generate fast GPU code.
+# To get the maximum performance, we need to enable nvcc's compiler hook.
+# This gives better performance than nvrtc mode.
 
 @tvm.register_func
 def tvm_callback_cuda_compile(code):
@@ -28,7 +28,7 @@ def tvm_callback_cuda_compile(code):
 # Prepare the Benchmark
 # ---------------------
 # We construct a standard imagenet inference benchmark.
-# We use nnvm's testing utility to produce the model description and random parameters that so the example does not
+# We use nnvm's testing utility to produce the model description and random parameters so that the example does not
 # depend on a specific front-end framework.
 #
 # .. note::
@@ -46,17 +46,17 @@ def tvm_callback_cuda_compile(code):
     batch_size=1, image_shape=image_shape)
 
 ######################################################################
-# Compile The Graph
+# Compile the Graph
 # -----------------
 # NNVM needs two things to compile a deep learning model:
 #
-# - net which is the graph representation of the computation
-# - params a dictionary of str to parameters.
+# - net: the graph representation of the computation
+# - params: a dictionary of str to parameters
 #
 # To compile the graph, we call the build function with the graph
 # configuration and parameters.
-# When parameters are provided, NNVM will pre-compute certain part of the graph if possible,
-# the new parameter set returned as the third return value.
+# When parameters are provided, NNVM will pre-compute certain part of the graph if possible (e.g. simplify batch normalization to scale shift),
+# and return the updated parameters.
 
 graph, lib, params = nnvm.compiler.build(
     net, target, shape={"data": data_shape}, params=params)
@@ -65,7 +65,7 @@ def tvm_callback_cuda_compile(code):
 # Run the Compiled Module
 # -----------------------
 #
-# To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph the lib and context.
+# To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph, the lib, and context.
 # Thanks to TVM, we can deploy the compiled module to many platforms and languages.
 # The deployment module is designed to contain minimum dependencies.
 # This example runs on the same machine.
@@ -79,5 +79,5 @@ def tvm_callback_cuda_compile(code):
 module.run()
 # get output
 out = module.get_output(0, tvm.nd.empty(out_shape))
-# Convert to numpy
+# convert to numpy
 out.asnumpy()