diff --git a/docs/Doxyfile b/docs/Doxyfile
index c52e14867a82..b96678ca1696 100644
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -770,7 +770,7 @@ WARN_LOGFILE           =
 # spaces.
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = include/tvm topi/include/topi nnvm/include/nnvm vta/include/vta
+INPUT                  = include/tvm topi/include/topi vta/include/vta
 
 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -1991,7 +1991,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             = DMLC_USE_CXX11 TVM_DLL= NNVM_DLL= __attribute__(x)=
+PREDEFINED             = DMLC_USE_CXX11 TVM_DLL= __attribute__(x)=
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/docs/api/python/autotvm.rst b/docs/api/python/autotvm.rst
index f6a9ff5f6aa8..5e8778502457 100644
--- a/docs/api/python/autotvm.rst
+++ b/docs/api/python/autotvm.rst
@@ -83,9 +83,6 @@ tvm.autotvm.task
 .. automodule:: tvm.autotvm.task.topi_integration
     :members:
 
-.. automodule:: tvm.autotvm.task.nnvm_integration
-    :members:
-
 tvm.autotvm.record
 ~~~~~~~~~~~~~~~~~~
 .. automodule:: tvm.autotvm.record
diff --git a/docs/api/python/index.rst b/docs/api/python/index.rst
index 2773fefedee3..7a8566eec7ba 100644
--- a/docs/api/python/index.rst
+++ b/docs/api/python/index.rst
@@ -40,6 +40,5 @@ Python API
    dev
    topi
    vta/index
-   nnvm/index
    hybrid
    relay/index
diff --git a/docs/api/python/nnvm/compiler.rst b/docs/api/python/nnvm/compiler.rst
deleted file mode 100644
index 4cf1b083df60..000000000000
--- a/docs/api/python/nnvm/compiler.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-nnvm.compiler
--------------
-
-.. automodule:: nnvm.compiler
-
-.. autofunction:: nnvm.compiler.build
-
-.. autofunction:: nnvm.compiler.build_config
-
-.. autofunction:: nnvm.compiler.save_param_dict
-
-.. autofunction:: nnvm.compiler.load_param_dict
-
-.. autofunction:: nnvm.compiler.optimize
-
-.. automodule:: nnvm.compiler.graph_util
-    :members:
-
-.. automodule:: nnvm.compiler.graph_attr
-    :members:
-
-.. automodule:: nnvm.compiler.compile_engine
-    :members:
diff --git a/docs/api/python/nnvm/frontend.rst b/docs/api/python/nnvm/frontend.rst
deleted file mode 100644
index ca8c4088fd08..000000000000
--- a/docs/api/python/nnvm/frontend.rst
+++ /dev/null
@@ -1,33 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-nnvm.frontend
--------------
-
-.. automodule:: nnvm.frontend
-
-.. autofunction:: nnvm.frontend.from_mxnet
-
-.. autofunction:: nnvm.frontend.from_onnx
-
-.. autofunction:: nnvm.frontend.from_coreml
-
-.. autofunction:: nnvm.frontend.from_keras
-
-.. autofunction:: nnvm.frontend.from_tensorflow
-
-.. autofunction:: nnvm.frontend.from_darknet
diff --git a/docs/api/python/nnvm/graph.rst b/docs/api/python/nnvm/graph.rst
deleted file mode 100644
index e9f667e416e8..000000000000
--- a/docs/api/python/nnvm/graph.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-nnvm.graph
-----------
-.. automodule:: nnvm.graph
-
-.. autofunction:: nnvm.graph.create
-
-.. autoclass:: nnvm.graph.Graph
-   :members:
diff --git a/docs/api/python/nnvm/index.rst b/docs/api/python/nnvm/index.rst
deleted file mode 100644
index 493a8fc1a772..000000000000
--- a/docs/api/python/nnvm/index.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-NNVM API
-========
-
-This document contains the python API to NNVM compiler toolchain.
-
-.. toctree::
-   :maxdepth: 2
-
-   compiler
-   frontend
-   symbol
-   graph
-   top
-   testing
diff --git a/docs/api/python/nnvm/symbol.rst b/docs/api/python/nnvm/symbol.rst
deleted file mode 100644
index 46dcac97ddf9..000000000000
--- a/docs/api/python/nnvm/symbol.rst
+++ /dev/null
@@ -1,27 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-nnvm.symbol
------------
-.. automodule:: nnvm.symbol
-
-.. autoclass:: nnvm.symbol.Symbol
-    :members:
-
-.. autoclass:: nnvm.symbol.Variable
-
-.. autofunction:: nnvm.symbol.Group
diff --git a/docs/api/python/nnvm/testing.rst b/docs/api/python/nnvm/testing.rst
deleted file mode 100644
index 9ee72d41b2eb..000000000000
--- a/docs/api/python/nnvm/testing.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-nnvm.testing
-------------
-
-.. automodule:: nnvm.testing
-
-.. autofunction:: nnvm.testing.ctx_list
-
-nnvm.testing.check_computation
-------------------------------
-
-.. automodule:: nnvm.testing.check_computation
-    :members:
-
-.. include:: testing_new_ops.rst
diff --git a/docs/api/python/nnvm/testing_new_ops.rst b/docs/api/python/nnvm/testing_new_ops.rst
deleted file mode 100644
index bf80b526a5d5..000000000000
--- a/docs/api/python/nnvm/testing_new_ops.rst
+++ /dev/null
@@ -1,152 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-Testing new operations
-----------------------
-
-When adding new operations, it is a good idea to test them. Testing
-should be done with the function ``nnvm.testing.check_function``. You
-should provide it with the symbol representing the result of a
-computation and a reference numpy implementation. By default, it will
-also check analytical gradients against numerical gradients if
-analytical gradients are implemented for your operation. You can also
-pass a reference implementation for the gradients, but numerical
-gradients will still be checked. Numerical gradient checking may be
-switched off explicitly, but doing this is not a good idea generally.
-Here is an example testing the logarithm operation:
-
-.. code:: python
-
-    import numpy as np
-    import nnvm
-    import nnvm.symbol as sym
-    from nnvm.testing.check_computation import check_function
-
-    x = sym.Variable("x")
-    y = sym.log(x)
-
-    def forward(x):
-        return np.log(x)
-
-    def backward(head_grads, x):
-        return [1. / x * head_grads]
-
-    dtype = "float32"
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, in_range=(0.001, 2.0), dtype=dtype, shape=shape)
-
-If you run the code above, you might get an ``AssertionError`` in rare
-cases. That’s why it is recommended to run new tests a lot of times.
-
-.. code:: python
-
-    for _ in range(10000):
-        check_function(y, forward, backward, in_range=(0.001, 2.0), dtype=dtype, shape=shape)
-
-If you run the code above then sooner or later you will get an exception
-which may look like this:
-
-.. code-block:: text
-
-    AssertionError: Analytical and numerical grads wrt x differ too much
-    analytical grad = [
-            ...
-        ]
-    numerical grad = [
-            ...
-        ]
-    distance > atol*sqrt(n) + rtol*grad_norm
-    distance 308.50885009765625 > 0.01*55.42562584220407 + 0.1*2167.70703125
-
-It means that either you have a mistake in the ``FGradient`` function or
-the numerical error is too high. Generally, if you look at the printed
-gradients and see that they differ only slightly or just in a single
-position, then it is a numerical error. But if the gradients look
-completely different, especially if many corresponding positions have
-different signs, then it must be something wrong with the analytical
-gradient implementation.
-
-Then try to make this error reproducible, and also try to reduce the
-shape of inputs, but not too much, a vector of 10 elements is a
-reasonable choice. Also you won’t need reference functions ``forward``
-and ``backward``, and restricting the number of targets might also be a
-good idea. Since the error may manifest itself only in rare cases, you
-might want to run it in a loop.
-
-.. code:: python
-
-    shape = {'x': (10,)}
-    np.random.seed(42)
-
-    for _ in range(1000):
-        check_function(y, in_range=(0.001, 2.0), dtype=dtype, shape=shape,
-                       numerical_grads=True, only_targets=['llvm'])
-
-Running this code will result in the following:
-
-.. code-block:: text
-
-    check_function failed while checking gradients numerically, here is the main graph
-    Graph(%x, %head_grads_0) {
-      %x, shape=[10], dtype=0
-      %head_grads_0, shape=[10], dtype=0
-      %1 = log(%x), shape=[10], dtype=0
-      %3 = elemwise_div(%head_grads_0, %x), shape=[10], dtype=0
-      ret %1, %3, %head_grads_0
-    }
-    graph_attr_keys = [layout_inputs, dtype_num_unknown_nodes, dtype, shape_num_unknown_nodes, shape]
-
-    Generated inputs:
-    {'x': array([2.5660574e-01, 1.5313280e+00, 1.0232578e-03, 8.3371508e-01,
-           1.0454979e+00, 1.1021420e-01, 1.9461832e+00, 4.5302454e-01,
-           6.0909325e-01, 6.0858107e-01], dtype=float32), 'head_grads_0': array([0.4616029 , 0.00394617, 1.4589603 , 1.9337242 , 0.44936267,
-           1.3264314 , 1.4840508 , 1.6970023 , 0.84583575, 0.60655886],
-          dtype=float32)}
-
-    ...
-
-    AssertionError: Analytical and numerical grads wrt x differ too much
-    analytical grad = [1.7988799e+00 2.5769596e-03 1.4257993e+03 2.3194065e+00 4.2980734e-01
-     1.2035031e+01 7.6254421e-01 3.7459390e+00 1.3886802e+00 9.9667716e-01]
-     numerical grad = [1.7948151e+00 1.9073486e-03 9.9268610e+02 2.3174286e+00 4.2915344e-01
-     1.1980057e+01 7.6198578e-01 3.7412643e+00 1.3866425e+00 9.9563599e-01]
-    distance > atol*sqrt(n) + rtol*grad_norm
-    distance 433.11322021484375 > 0.01*3.1622776601683795 + 0.1*992.7716674804688
-
-In this case the largest difference is in the 2nd position (starting
-from 0) which corresponds to input value ``1.0232578e-03``. This value
-is too close to the singularity, so the numerical derivative gets too
-imprecise. The solution is to shrink the range for ``x``, here, for
-example, ``(0.002, 2.0)`` turned out to be enough. Don’t forget to run
-lots of tests, so that other people don’t get false positives.
-
-.. code:: python
-
-    for _ in range(100):
-        check_function(y, in_range={x: (0.002, 2.0)}, dtype=dtype, shape=(1, 3, 32, 32),
-                       numerical_grads=True, only_targets=['llvm'])
-
-If you need a more precise control over which values get passed to the
-checking function, you can use ``values={x: ...}``:
-
-.. code:: python
-
-    x_val = np.array([1.2594858e+00, 1.0960974e-01, 1.4975418e+00, 6.3585603e-01,
-           1.2692513e-03, 1.0227472e+00, 9.4656967e-02, 5.5306298e-01,
-           1.4142460e+00, 1.2631655e-01], dtype=np.float32)
-    check_function(y, values={x: x_val}, dtype=dtype, shape=shape,
-                   numerical_grads=True, only_targets=['llvm'])
diff --git a/docs/api/python/nnvm/top.rst b/docs/api/python/nnvm/top.rst
deleted file mode 100644
index ff946e7639c8..000000000000
--- a/docs/api/python/nnvm/top.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-nnvm.top
---------
-.. automodule:: nnvm.top
-
-.. autofunction:: register_compute
-
-.. autofunction:: register_schedule
-
-.. autofunction:: register_pattern
-
-
-.. autoclass:: nnvm.top.AttrDict
-   :members:
diff --git a/docs/conf.py b/docs/conf.py
index c4410e5864f9..a098ad4e4d55 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -43,7 +43,6 @@
 curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
 sys.path.insert(0, os.path.join(curr_path, '../python/'))
 sys.path.insert(0, os.path.join(curr_path, '../topi/python'))
-sys.path.insert(0, os.path.join(curr_path, '../nnvm/python'))
 sys.path.insert(0, os.path.join(curr_path, '../vta/python'))
 
 # -- General configuration ------------------------------------------------
@@ -60,7 +59,6 @@
     '.md': CommonMarkParser
 }
 os.environ['TVM_BUILD_DOC'] = '1'
-os.environ['NNVM_BUILD_DOC'] = '1'
 # Version information.
 import tvm
 version = tvm.__version__
diff --git a/docs/deploy/android.md b/docs/deploy/android.md
index daf023c38042..788ab412db62 100644
--- a/docs/deploy/android.md
+++ b/docs/deploy/android.md
@@ -20,18 +20,15 @@
 
 ## Build model for Android Target
 
-NNVM compilation of model for android target could follow same approach like android_rpc.
-
-An reference example can be found at [chainer-nnvm-example](https://github.com/tkat0/chainer-nnvm-example)
-
-Above example will directly run the compiled model on RPC target. Below modification at [rum_mobile.py](https://github.com/tkat0/chainer-nnvm-example/blob/5b97fd4d41aa4dde4b0aceb0be311054fb5de451/run_mobile.py#L64) will save the compilation output which is required on android target.
+Relay compilation of model for android target could follow same approach like android_rpc.
+The code below will save the compilation output which is required on android target.
 
 ```
 lib.export_library("deploy_lib.so", ndk.create_shared)
 with open("deploy_graph.json", "w") as fo:
     fo.write(graph.json())
 with open("deploy_param.params", "wb") as fo:
-    fo.write(nnvm.compiler.save_param_dict(params))
+    fo.write(relay.save_param_dict(params))
 ```
 
 deploy_lib.so, deploy_graph.json, deploy_param.params will go to android target.
diff --git a/docs/deploy/index.rst b/docs/deploy/index.rst
index db50865008a2..9a30b96ca66e 100644
--- a/docs/deploy/index.rst
+++ b/docs/deploy/index.rst
@@ -67,5 +67,4 @@ target device without relying on RPC. see the following resources on how to do s
 
    cpp_deploy
    android
-   nnvm
    integrate
diff --git a/docs/deploy/nnvm.md b/docs/deploy/nnvm.md
deleted file mode 100644
index 650912231b12..000000000000
--- a/docs/deploy/nnvm.md
+++ /dev/null
@@ -1,196 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-# Deploy NNVM Modules
-NNVM compiled modules are fully embedded in TVM runtime as long as ```GRAPH_RUNTIME``` option
-is enabled in tvm runtime.
-
-
-In a nutshell, we will need three items to deploy a compiled module.
-Checkout our tutorials on getting started with NNVM compiler for more details.
-
-- The graph json data which contains the execution graph.
-- The tvm module library of compiled functions.
-- The parameter blobs for stored parameters.
-
-We can then use TVM's runtime API to deploy the compiled module.
-Here is an example in python.
-
-```python
-import tvm
-
-# tvm module for compiled functions.
-loaded_lib = tvm.module.load("deploy.so")
-# json graph
-loaded_json = open(temp.relpath("deploy.json")).read()
-# parameters in binary
-loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read())
-
-fcreate = tvm.get_global_func("tvm.graph_runtime.create")
-ctx = tvm.gpu(0)
-gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id)
-set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"]
-set_input("x", tvm.nd.array(x_np))
-gmodule["load_params"](loaded_params)
-run()
-out = tvm.nd.empty(shape)
-get_output(0, out)
-print(out.asnumpy())
-```
-
-An example in c++.
-```cpp
-#include <dlpack/dlpack.h>
-#include <tvm/runtime/module.h>
-#include <tvm/runtime/registry.h>
-#include <tvm/runtime/packed_func.h>
-
-#include <algorithm>
-#include <fstream>
-#include <iterator>
-#include <stdexcept>
-#include <string>
-
-int main()
-{
-    // tvm module for compiled functions
-    tvm::runtime::Module mod_syslib = tvm::runtime::Module::LoadFromFile("deploy.so");
-
-    // json graph
-    std::ifstream json_in("deploy.json", std::ios::in);
-    std::string json_data((std::istreambuf_iterator<char>(json_in)), std::istreambuf_iterator<char>());
-    json_in.close();
-
-    // parameters in binary
-    std::ifstream params_in("deploy.params", std::ios::binary);
-    std::string params_data((std::istreambuf_iterator<char>(params_in)), std::istreambuf_iterator<char>());
-    params_in.close();
-
-    // parameters need to be TVMByteArray type to indicate the binary data
-    TVMByteArray params_arr;
-    params_arr.data = params_data.c_str();
-    params_arr.size = params_data.length();
-
-    int dtype_code = kDLFloat;
-    int dtype_bits = 32;
-    int dtype_lanes = 1;
-    int device_type = kDLCPU;
-    int device_id = 0;
-
-    // get global function module for graph runtime
-    tvm::runtime::Module mod = (*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))(json_data, mod_syslib, device_type, device_id);
-
-    DLTensor* x;
-    int in_ndim = 4;
-    int64_t in_shape[4] = {1, 3, 224, 224};
-    TVMArrayAlloc(in_shape, in_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &x);
-    // load image data saved in binary
-    const std::string data_filename = "cat.bin";
-    std::ifstream data_fin(data_filename, std::ios::binary);
-    if(!data_fin) throw std::runtime_error("Could not open: " + data_filename);
-    data_fin.read(static_cast<char*>(x->data), 3 * 224 * 224 * 4);
-
-    // get the function from the module(set input data)
-    tvm::runtime::PackedFunc set_input = mod.GetFunction("set_input");
-    set_input("data", x);
-
-    // get the function from the module(load patameters)
-    tvm::runtime::PackedFunc load_params = mod.GetFunction("load_params");
-    load_params(params_arr);
-
-    // get the function from the module(run it)
-    tvm::runtime::PackedFunc run = mod.GetFunction("run");
-    run();
-
-    DLTensor* y;
-    int out_ndim = 2;
-    int64_t out_shape[2] = {1, 1000, };
-    TVMArrayAlloc(out_shape, out_ndim, dtype_code, dtype_bits, dtype_lanes, device_type, device_id, &y);
-
-    // get the function from the module(get output data)
-    tvm::runtime::PackedFunc get_output = mod.GetFunction("get_output");
-    get_output(0, y);
-
-    // get the maximum position in output vector
-    auto y_iter = static_cast<float*>(y->data);
-    auto max_iter = std::max_element(y_iter, y_iter + 1000);
-    auto max_index = std::distance(y_iter, max_iter);
-    std::cout << "The maximum position in output vector is: " << max_index << std::endl;
-
-    TVMArrayFree(x);
-    TVMArrayFree(y);
-
-    return 0;
-}
-```
-
-## Deploy as System Module
-C++ additionally support deployment as system module.
-This process need few additional options as given below to NNVM build.
-
-- For target llvm append --system-lib as ```target=llvm --system-lib```
-- For a GPU build (or non llvm) the additional option should be given to targat_host as ```target_host=llvm --system-lib```
-
-Module export require additional options for not to compile but save as ```lib.export_library (path, fcompile=False)```
-
-The output of above API is a tar compressed file containing object file ```(lib.o)``` and cpp source file ```(devc.cc)``` which embeds device blob. Thease two files should be compiled along with other files or objects while building c++ application.
-Please refer to [Makefile](https://github.com/apache/incubator-tvm/tree/master/apps/howto_deploy/Makefile#L32) for a reference.
-
-The c++ code to load this system module require the below change.
-
-```cpp
-    // tvm module for compiled functions
-    tvm::runtime::Module mod_syslib = (*tvm::runtime::Registry::Get("module._GetSystemLib"))();
-```
-
-Based on the build environment the system object, device blob source should be included in the final executable. An example with bazel build is given below.
-```bash
-cc_library(
-    name = "host_module",
-    srcs = ["lib.o"],
-    alwayslink=1
-)
-
-cc_library(
-    name = "device_module",
-    srcs = ["devc.cc"],
-    alwayslink=1
-)
-
-cc_library(
-    name = "tvm_runtime",
-    srcs = ["libtvm_runtime_pack.cc"],
-)
-
-cc_binary(
-    name = "bazel_deploy",
-    srcs = ["cpp_deploy.cc"],
-    deps = [
-        ":tvm_runtime", ":host_module", ":device_module"
-    ],
-    linkopts = [ "-lpthread -ldl" ]
-)
-
-```
-
-This build directive creates
-- new library ```host_module``` out of ```lib.o```
-- new library ```device_module``` out of ```devc.cc```
-
-These intermediate modules can be used as a dependency to final deploy application.
-
-In bazel ```alwayslink=1``` enforce embedding entire lib into application (even though it doesn't call any API from this module).
diff --git a/docs/dev/codebase_walkthrough.rst b/docs/dev/codebase_walkthrough.rst
index ffda632f738f..7e78d5753027 100644
--- a/docs/dev/codebase_walkthrough.rst
+++ b/docs/dev/codebase_walkthrough.rst
@@ -16,7 +16,7 @@
     under the License.
 
 =======================================
-**TVM Codebase Walkthrough by Example**
+TVM Codebase Walkthrough by Example
 =======================================
 
 Getting to know a new codebase can be a challenge. This is especially true for a codebase like that of TVM, where different components interact in non-obvious ways. In this guide, we try to illustrate the key elements that comprise a compilation pipeline with a simple example. For each important step, we show where in the codebase it is implemented. The purpose is to let new developers and interested users dive into the codebase more quickly.
@@ -28,16 +28,13 @@ Codebase Structure Overview
 At the root of the TVM repository, we have following subdirectories that together comprise a bulk of the codebase.
 
 - ``src`` - C++ code for operator compilation and deployment runtimes.
-- ``src/relay`` - Implementation of Relay, a new IR for deep learning framework superseding ``nnvm`` below.
+- ``src/relay`` - Implementation of Relay, a new functional IR for deep learning framework.
 - ``python`` - Python frontend that wraps C++ functions and objects implemented in ``src``.
 - ``topi`` - Compute definitions and backend schedules for standard neural network operators.
-- ``nnvm`` - C++ code and Python frontend for graph optimization and compilation. After the introduction of Relay, it remains in the codebase for backward compatibility.
 
 Using standard Deep Learning terminology, ``src/relay`` is the component that manages a computational graph, and nodes in a graph are compiled and executed using infrastructure implemented in the rest of ``src``. ``python`` provides python bindings for the C++ API and driver code that users can use to execute compilation. Operators corresponding to each node are registered in ``src/relay/op``. Implementations of operators are in ``topi``, and they are coded in either C++ or Python.
 
-Relay is the new IR for deep networks that is intended to replace NNVM. If you have used NNVM, Relay provides equivalent or better functionality. In fact, Relay goes beyond a traditional way of thinking deep networks in terms of computational graphs. But for the purpose of this document, we can think of Relay as a traditional computational graph framework. You can read more about Relay `here <https://docs.tvm.ai/dev/relay_intro.html>`_.
-
-When a user invokes graph compilation by ``relay.build(...)`` (or ``nnvm.compiler.build(...)`` for the older API), the following sequence of actions happens for each node in the graph:
+When a user invokes graph compilation by ``relay.build(...)``, the following sequence of actions happens for each node in the graph:
 
 - Look up an operator implementation by querying the operator registry
 - Generate a compute expression and a schedule for the operator
diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst
index 65f206f0cd5e..4c49e926a8df 100644
--- a/docs/dev/debugger.rst
+++ b/docs/dev/debugger.rst
@@ -16,18 +16,18 @@
     under the License.
 
 =================
-**Debugger**
+Debugger
 =================
 
 TVM Debugger is an interface for debugging TVM's computation graph execution. It helps to provide access to graph structures and tensor values at the TVM runtime.
 
 *******************************************
-**Debug Exchange Format**
+Debug Exchange Format
 *******************************************
 
-**1. Computational Graph**
-==========================
-The optimized graph build by nnvm in json
+1. Computational Graph
+======================
+The optimized graph build by relay in json
 serialized format is dumped as it is. This contains the whole
 information about the graph. The UX can either use this graph directly
 or transform this graph to the format UX can understand.
@@ -35,7 +35,7 @@ or transform this graph to the format UX can understand.
 The Graph JSON format is explained below
 
 1. ``nodes``
-Nodes are either placeholders or computational nodes in NNVM graph. The nodes are stored
+Nodes are either placeholders or computational nodes in json. The nodes are stored
 as a list. A node contains the below information
 
 -     ``op`` - operation type, ``null`` means it is a placeholder/variable/input node and``tvm_op`` means this node can be executed
@@ -44,7 +44,7 @@ as a list. A node contains the below information
 -     ``attrs`` - Attributes of the node which contains the following information
 
     -     ``flatten_data`` - Whether this data need to be flattened before execution
-    -     ``func_name`` - Fused function name, corresponds to the symbol in the lib generated by NNVM compilation process.
+    -     ``func_name`` - Fused function name, corresponds to the symbol in the lib generated by relay compilation process.
     -     ``num_inputs`` - Number of inputs for this node
     -     ``num_outputs`` - Number of outputs this node produces
 
@@ -82,7 +82,7 @@ Example of dumped graph:
           "name": "relu0",                          # Name of the node
           "attrs": {                                # Attributes of the node
             "flatten_data": "0",                    # Whether this data need to be flattened
-            "func_name": "fuse_l2_normalize_relu",  # Fused function name, corresponds to the symbol in the lib generated by NNVM compilation process
+            "func_name": "fuse_l2_normalize_relu",  # Fused function name, corresponds to the symbol in the lib generated by compilation process
             "num_inputs": "1",                      # Number of inputs for this node
             "num_outputs": "1"                      # Number of outputs this node produces
           },
@@ -105,8 +105,8 @@ Example of dumped graph:
       }
     }
 
-**2. Tensor dumping**
-=====================
+2. Tensor dumping
+=================
 
 The tensor received after execution is in ``tvm.ndarray`` type. All the tensors will
 be saved as binary bytes in serialized format.  The result binary bytes can be loaded by the
@@ -155,7 +155,7 @@ folder specified while creating the runtime.
 Sample Output
 ***************************************
 
-The below is the output of running  ``tvm/nnvm/tutorials/from_onnx.py`` with debugger.
+The below is the an example output of the debugger.
 
 ::
 
diff --git a/docs/dev/index.rst b/docs/dev/index.rst
index 983d04f93fe3..9f46c39ce0a7 100644
--- a/docs/dev/index.rst
+++ b/docs/dev/index.rst
@@ -34,6 +34,4 @@ In this part of documentation, we share the rationale for the specific choices m
    virtual_machine
    codebase_walkthrough
    inferbound
-   nnvm_json_spec
-   nnvm_overview
    benchmark
diff --git a/docs/dev/nnvm_json_spec.rst b/docs/dev/nnvm_json_spec.rst
deleted file mode 100644
index 60d1b9b12bfe..000000000000
--- a/docs/dev/nnvm_json_spec.rst
+++ /dev/null
@@ -1,229 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-NNVM Graph JSON Specification
-=============================
-
-NNVM uses JSON for graph serialization. This allows NNVM graph to be
-exported to any backend either natively supported or by third-party
-without any dependency such as protobuf.
-
-Getting started
----------------
-
-A serialized NNVM graph in JSON format can be deserialized by any JSON
-parser.
-
-.. code:: python
-
-    # python
-    import json
-    with open('model.json', 'r') as f:
-      graph = json.loads(f.read())
-    print(graph.keys())
-
-``['nodes', 'arg_nodes', 'heads', 'node_row_ptr']``
-
-Actually, the following keys are valid in JSON graph.
-
-+--------------------------------------+------------+-----------------------------------+
-| Keys                                 | Required   | Description                       |
-+======================================+============+===================================+
-| `nodes <#nodes>`__                   | Yes        | The nodes in graph.               |
-+--------------------------------------+------------+-----------------------------------+
-| `arg\_nodes <#arg_nodes>`__          | Yes        | Indices of input nodes.           |
-+--------------------------------------+------------+-----------------------------------+
-| `heads <#heads>`__                   | Yes        | Indices of output nodes.          |
-+--------------------------------------+------------+-----------------------------------+
-| `node\_row\_ptr <#node_row_ptr>`__   | Optional   | Depth first search row indices.   |
-+--------------------------------------+------------+-----------------------------------+
-| `attr <#attr>`__                     | Optional   | Additional information.           |
-+--------------------------------------+------------+-----------------------------------+
-
-nodes
------
-
-Explained by the name itself, ``nodes`` are either placeholders or
-computational nodes in NNVM graph. The ``nodes`` are stored in list.
-
-.. code:: python
-
-    nodes = graph['nodes']
-    print(len(nodes))
-    print(nodes[0])
-    print(nodes[3])
-
-::
-
-    53
-    {'inputs': [], 'name': 'data', 'op': 'null'}
-    {'inputs': [[0, 0, 0], [1, 0, 0], [2, 0, 0]], 'attrs': {'channels': '64',
-    'padding': '(1, 1)', 'layout': 'NCHW', 'kernel_size': '[3, 3]', 'groups': '1',
-    'strides': '(1, 1)', 'use_bias': 'True', 'dilation': '(1, 1)'},
-    'name': 'conv1_1', 'op': 'conv2d'}
-
-The following keys are valid in each node:
-
-+----------------+------------------+----------+
-| Keys           | Required         | Descript |
-|                |                  | ion      |
-+================+==================+==========+
-| op             | Yes              | The      |
-|                |                  | operator |
-|                |                  | type     |
-|                |                  | name,    |
-|                |                  | 'null'   |
-|                |                  | is used  |
-|                |                  | if it's  |
-|                |                  | a        |
-|                |                  | placehol |
-|                |                  | der/vari |
-|                |                  | able/inp |
-|                |                  | ut.      |
-+----------------+------------------+----------+
-| name           | Yes              | The      |
-|                |                  | given    |
-|                |                  | name of  |
-|                |                  | the      |
-|                |                  | node,    |
-|                |                  | defined  |
-|                |                  | by user  |
-|                |                  | composin |
-|                |                  | g        |
-|                |                  | the      |
-|                |                  | network. |
-+----------------+------------------+----------+
-| inputs         | Yes              | List of  |
-|                |                  | Entry    |
-|                |                  | of the   |
-|                |                  | input    |
-|                |                  | nodes,   |
-|                |                  | can be   |
-|                |                  | empty    |
-|                |                  | list []. |
-|                |                  | Entry is |
-|                |                  | a list   |
-|                |                  | of       |
-|                |                  | [nose\_i |
-|                |                  | d,       |
-|                |                  | index,   |
-|                |                  | version] |
-+----------------+------------------+----------+
-| attrs          | Optional         | Extra    |
-|                |                  | attribut |
-|                |                  | es       |
-|                |                  | for the  |
-|                |                  | specific |
-|                |                  | operator |
-|                |                  | .        |
-+----------------+------------------+----------+
-| control\_deps  | Optional         | Control  |
-|                |                  | dependen |
-|                |                  | cies,    |
-|                |                  | left     |
-|                |                  | blank    |
-|                |                  | unless   |
-|                |                  | specific |
-|                |                  | ally     |
-|                |                  | used.    |
-+----------------+------------------+----------+
-
-``attrs`` for operators is a dictionary. Key-value pair examples:
-
-+----------------+------------------+----------+----------+
-| Keys           | Value            | Operator | Descript |
-|                |                  |          | ion      |
-+================+==================+==========+==========+
-| 'channels'     | '64'             | conv2d   | Output   |
-|                |                  |          | channels |
-|                |                  |          | for 2d   |
-|                |                  |          | convolut |
-|                |                  |          | ion.     |
-+----------------+------------------+----------+----------+
-| 'kernel\_size' | '[3, 3]'         | conv2d   | Convolut |
-|                |                  |          | ion      |
-|                |                  |          | filter   |
-|                |                  |          | kernel   |
-|                |                  |          | size in  |
-|                |                  |          | (h, w),  |
-|                |                  |          | list and |
-|                |                  |          | tuple    |
-|                |                  |          | both     |
-|                |                  |          | works.   |
-+----------------+------------------+----------+----------+
-| 'use\_bias'    | '1'              | conv2d   | Whether  |
-|                |                  |          | use bias |
-|                |                  |          | such     |
-|                |                  |          | that     |
-|                |                  |          | `y = w   |
-|                |                  |          | * x + b` |
-|                |                  |          | .        |
-+----------------+------------------+----------+----------+
-
-.. note::
-
-    Tips for parsing key-value pair:
-
-    * Both key and value are stored as strings.
-
-    * Boolean values need extra attention, convert to int is recommended since `bool('0') == True` in python.
-
-    * For a full list of operator attributes, please refer to the core operator `documentation <top.html>`__.
-
-arg\_nodes
-----------
-
-``arg_nodes`` is a list of indices of nodes which is
-placeholder/variable/input to the graph.
-
-.. code:: python
-
-    print(graph['arg_nodes'])
-
-::
-
-    [0, 1, 2, 6, 7, 11, 12, 15, 16, 20, 21, 24, 25, 29, 30, 33, 34, 39, 40, 44, 45, 49, 50]
-
-For example, ``nodes[3]`` is not in ``arg_nodes`` because it's an
-internal node.
-
-heads
------
-
-``heads`` is a list of entries as the outlet/output of the graph.
-
-.. code:: python
-
-    print(graph['heads'])
-
-::
-
-    [[52, 0, 0]]
-
-This example indicating that there's only one output in the graph, with
-index 52.
-
-node\_row\_ptr
---------------
-
-``node_row_ptr`` stores the history of forward path, so you can skip
-constructing the entire graph in inference tasks.
-
-attrs
------
-
-``attrs`` can contain version numbers or similar helpful informations.
diff --git a/docs/dev/nnvm_overview.md b/docs/dev/nnvm_overview.md
deleted file mode 100644
index b4a8ee7ccb9f..000000000000
--- a/docs/dev/nnvm_overview.md
+++ /dev/null
@@ -1,143 +0,0 @@
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-
-# NNVM Design Overview
-
-NNVM is a reusable graph IR stack for deep learning systems. It provides useful API to construct, represent and transform computation graphs to get most high-level optimization needed in deep learning.
-As a part of TVM stack for deep learning, NNVM also provides a shared compiler for deep learning frameworks to optimize, compile and deploy into different hardware backends via [TVM](https://github.com/apache/incubator-tvm)
-
-## Key Requirements and Design Choices
-
-- Have minimum dependency in the deployment module.
-- Being able to add new operators to the IR, in a decentralized fashion.
-- Being able to add new optimization passes to the IR and applies to existing graphs.
-
-The item2 and 3 are particularly interesting if we compare it to a typical compiler IR. Compiler IR usually contains a fixed set of primitives(instructions), and use them as a contract between optimization pass designers. This design enables easy addition of new optimization passes, but not new operator(instruction). Because every time we add a new instruction, we need to modify the passes to accommodate these changes.
-
-Deep learning frameworks usually have a fixed operator interface(schema). These interfaces can contain properties like shape inference function, whether in-place computation can happen.  The operator interface is an again contract that makes it easy to add new an operator. But it is hard to add new passes in decentralized fashion a new optimization pass usually requires additional information, and this results in frequent changes of the centralized operator interface when we are exploring new optimizations. There is also a drawback of modularization. For example, a graph compiler for FPGA devices may not need the GPU device specific attributes.
-
-During our explorations in graph optimization and compilation, we find that it is important to quickly add both operators and passes to the framework without changing the core library.
-
-Here is a list of key elements in NNVM's design
-
--  Operator registry system to register and add new operators
--  Operator attribute system provide property of operator in decentralized fashion
--  A reusable IR data structure for optimization passes.
-
-The above list is more like the generic language part of NNVM, besides of that, we also provide a collection of core operator primitives, and graph optimization passes.   The core tensor operator primitives and optimizations already cover commonly deep learning workloads. This design allows the NNVM compiler to be directly used as optimization and compilation stack for frameworks. The extendible nature of NNVM makes new adjustment easy without constraining the backend providers.
-
-## Minimum Registration for a Symbolic Front-End
-To use NNVM to build language front end, a developer only needs to register minimum information about each operator.
-
-```c++
-NNVM_REGISTER_OP(add)
-.describe("add two data together")
-.set_num_inputs(2);
-
-NNVM_REGISTER_OP(conv2d)
-.describe("take 2d convolution of input")
-.set_num_inputs(2);
-
-NNVM_REGISTER_OP(assign)
-.describe("assign second input argument to the first one")
-.set_num_inputs(2);
-```
-
-Compiling the code with NNVM library. User can use the following interface to compose the computation graph in python, like the following code.
-
-```python
-import nnvm.symbol as nn
-
-# symbolic variable
-x = nn.Variable('x')
-y = nn.Variable('y')
-w = nn.Variable('w')
-
-z = nn.conv2d(nn.elemwise_add(x, y), w, kernel_size=(2,2), name='conv1')
-```
-
-The graph structure is interchangeable between the frontend and the backend.  Python interface is supported currently. More language support can be easily
-moved in the future.
-
-## Operator Attribute for More Extensions
-
-The minimum information provided by the operator is enough to get a front-end. However,   we need more knowledge about each operator to do transformations and executing the graph.
-A typical difference between neural nets' computation graph and traditional compiler IR is that there are a lot more high-level operators. We cannot fix the set of operators in the IR.
-
-NNVM allow developers to register attributes of each operator. The attributes can include shape inference function, whether the operator can perform in-place calculation etc.
-
-This design to having an operator attribute registry is not uncommon in deep learning systems.
-For example, MXNet has a ```OpProperty``` class, Tensorflow has a ```OpDef``` and Caffe2 have a ```OperatorSchema``` class.
-However, the operator attribute interface listed in these frameworks only support a fixed number of defined attributes of interest to the system. If we want to extend the framework to add a new attribute in each operator, we need to change the operator registry.
-Eventually, the operator interface grows into to be very big and have to evolve in the centralized repo.
-
-In NNVM, we decided to change the design and support arbitrary type of operator attributes, without changing the interface registry. The minimum interface also makes it easier to share across multiple projects
-
-User can register new attribute, such as inplace property checking function as follows.
-```c++
-using FInplaceOption = std::function<
-  std::vector<std::pair<int, int> > (const NodeAttrs& attrs)>;
-
-// we can register attributes from multiple places.
-NNVM_REGISTER_OP(elemwise_add)
-.set_num_inputs(2);
-
-// register to tell first input can be calculate inplace with first output
-NNVM_REGISTER_OP(add)
-.set_attr<FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs) {
-  return std::vector<std::pair<int, int> >{{0, 0}};
- });
-
-NNVM_REGISTER_OP(exp)
-.set_num_inputs(1)
-.set_attr<FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs) {
-  return std::vector<std::pair<int, int> >{{0, 0}};
- });
-```
-
-We can query these attributes at arbitrary parts of the code, like the following parts. Under the hood, each attribute is stored in a columnar store, that can easily be retrieved table and do quick lookups.
-
-```c++
-void MyFunction() {
-  const Op* add = Op::Get("add");
-  // if we need quick query, we can use static variable
-  // attribute map contains attributes of all operators.
-  static auto& finplace_option_map = Op::GetAttr<FInplaceOption>("FInplaceOption");
-
-  // quick look up attribute of add, O(1) time, vector index lookup internally.
-  auto add_inplace = finplace_option_map[add];
-}
-```
-Besides making the code minimum, this attribute store enables decentralization of projects.
-Before, all the attributes of operator have to sit on a centralized interface class.
-Now, everyone can register attributes of their own, take some other attributes they need from another project without changing the operator interface and core library
-
-
-## Graph and Pass
-
-We can use the additional information on attribute registry to do optimizations and get more information about the graph. Graph is the unit we manipulate in these steps. A Graph in NNVM contains
-two parts:
-- The computation graph structure
-- A attribute map from string to any type ```map<string, shared_ptr<any> >```
-
-The second attribute map is quite important, as we may need different kinds
-of information about the graph during the transformation process. Let it be
-shapes of each tensor, types of each tensor or the storage allocation plans.
-
-A ```Pass``` can take a graph with existing attribute information,
-and transform it to the same graph structure with more graph attributes or another graph.
diff --git a/docs/faq.md b/docs/faq.md
index 3161e3bff082..f070ed59a575 100644
--- a/docs/faq.md
+++ b/docs/faq.md
@@ -26,7 +26,7 @@ See [Installation](http://docs.tvm.ai/install/)
 TVM's relation to Other IR/DSL Projects
 ---------------------------------------
 There are usually two levels of abstractions of IR in the deep learning systems.
-NNVM, TensorFlow's XLA and Intel's ngraph uses computation graph representation.
+TensorFlow's XLA and Intel's ngraph uses computation graph representation.
 This representation is high level, and can be helpful to perform generic optimizations
 such as memory reuse, layout transformation and automatic differentiation.
 
diff --git a/docs/frontend/tensorflow.rst b/docs/frontend/tensorflow.rst
index c2fefedfebf7..87341ab6b7c6 100644
--- a/docs/frontend/tensorflow.rst
+++ b/docs/frontend/tensorflow.rst
@@ -97,7 +97,7 @@ Import the Model
 Explicit Shape:
 ~~~~~~~~~~~~~~~
 
-To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases <https://github.com/apache/incubator-tvm/blob/master/nnvm/tests/python/frontend/tensorflow/test_forward.py#L36>`_ for examples.
+To ensure shapes can be known throughout the entire graph, pass the ```shape``` argument to ```from_tensorflow```. This dictionary maps input names to input shapes. Please refer to these `test cases <https://github.com/apache/incubator-tvm/blob/master/tests/python/frontend/tensorflow/test_forward.py#L36>`_ for examples.
 
 Data Layout
 ~~~~~~~~~~~
diff --git a/docs/index.rst b/docs/index.rst
index f02dcc7c91e2..258547a34acd 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -45,7 +45,6 @@ Developer Guide
    :maxdepth: 2
 
    dev/index
-   nnvm_top
 
 Frontends
 ----------------
diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
index c4d0c590638d..b1b780b28c0d 100644
--- a/docs/install/from_source.rst
+++ b/docs/install/from_source.rst
@@ -62,8 +62,7 @@ The minimal building requirements are
 - CMake 3.5 or higher
 - We highly recommend to build with LLVM to enable all the features.
 - If you want to use CUDA, CUDA toolkit version >= 8.0 is required. If you are upgrading from an older version, make sure you purge the older version and reboot after installation.
-- It is possible to build TVM without the LLVM dependency if you only want to use CUDA/OpenCL
-- If you want to use the NNVM compiler, then LLVM is required
+
 
 We use cmake to build the library.
 The configuration of TVM can be modified by `config.cmake`.
@@ -132,7 +131,6 @@ In order to generate the VS solution file using cmake, make sure you have a rece
 This will generate the VS project using the MSVC 14 64 bit generator.
 Open the .sln file in the build directory and build with Visual Studio.
 In order to build with LLVM in windows, you will need to build LLVM from source.
-You need to run build the nnvm by running the same script under the nnvm folder.
 
 Building ROCm support
 ~~~~~~~~~~~~~~~~~~~~~
@@ -165,7 +163,7 @@ Method 1
    .. code:: bash
 
        export TVM_HOME=/path/to/tvm
-       export PYTHONPATH=$TVM_HOME/python:$TVM_HOME/topi/python:$TVM_HOME/nnvm/python:${PYTHONPATH}
+       export PYTHONPATH=$TVM_HOME/python:$TVM_HOME/topi/python:${PYTHONPATH}
 
 
 Method 2
@@ -180,7 +178,6 @@ Method 2
        export MACOSX_DEPLOYMENT_TARGET=10.9  # This is required for mac to avoid symbol conflicts with libstdc++
        cd python; python setup.py install --user; cd ..
        cd topi/python; python setup.py install --user; cd ../..
-       cd nnvm/python; python setup.py install --user; cd ../..
 
 
 Python dependencies
diff --git a/docs/nnvm_top.rst b/docs/nnvm_top.rst
deleted file mode 100644
index 8679cae9b256..000000000000
--- a/docs/nnvm_top.rst
+++ /dev/null
@@ -1,297 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-NNVM Core Tensor Operators
-==========================
-
-This page contains the list of core tensor operator primitives pre-defined in NNVM.
-The core tensor operator primitives(``nnvm.top``) covers typical workloads in deep learning.
-They can represent workloads in front-end frameworks, and provide basic building blocks for optimization.
-Since deep learning is a fast evolving field and it is that possible to have operators that are not in here.
-NNVM is designed for this problem and can easily new operators without changing the core library.
-
-.. note::
-
-   Each operator node in the graph IR contains the following two kinds of parameters.
-
-   - inputs: positional list of input tensors
-   - attrs: attributes about operator(e.g. kernel_size in conv2d)
-
-   This document lists both inputs and attributes in the parameter field.  You can distinguish them by the marked type. The inputs are of type Tensor, while the rest parameters are attributes.
-   To construct the graph with NNVM python API, a user can pass in the input Tensors as positional arguments, and attributes as keyword arguments.
-
-
-Overview of Operators
----------------------
-**Level 1: Basic Operators**
-
-This level enables fully connected multi-layer perceptron.
-
-.. autosummary::
-   :nosignatures:
-
-   nnvm.symbol.dense
-   nnvm.symbol.relu
-   nnvm.symbol.prelu
-   nnvm.symbol.tanh
-   nnvm.symbol.sigmoid
-   nnvm.symbol.exp
-   nnvm.symbol.log
-   nnvm.symbol.sqrt
-   nnvm.symbol.logical_and
-   nnvm.symbol.logical_or
-   nnvm.symbol.logical_not
-   nnvm.symbol.elemwise_add
-   nnvm.symbol.elemwise_sub
-   nnvm.symbol.elemwise_mul
-   nnvm.symbol.elemwise_div
-   nnvm.symbol.elemwise_sum
-   nnvm.symbol.elemwise_mod
-   nnvm.symbol.elemwise_pow
-   nnvm.symbol.flatten
-   nnvm.symbol.concatenate
-   nnvm.symbol.expand_dims
-   nnvm.symbol.squeeze
-   nnvm.symbol.split
-   nnvm.symbol.dropout
-   nnvm.symbol.batch_norm
-   nnvm.symbol.softmax
-   nnvm.symbol.log_softmax
-   nnvm.symbol.pad
-   nnvm.symbol.block_grad
-   nnvm.symbol.matmul
-   nnvm.symbol.resize
-   nnvm.symbol.upsampling
-   nnvm.symbol.take
-   nnvm.symbol.l2_normalize
-   nnvm.symbol.flip
-   nnvm.symbol.lrn
-   nnvm.symbol.where
-   nnvm.symbol.gather_nd
-
-
-**Level 2: Convolutions**
-
-This level enables typical convnet models.
-
-.. autosummary::
-   :nosignatures:
-
-   nnvm.symbol.conv2d
-   nnvm.symbol.conv2d_transpose
-   nnvm.symbol.max_pool2d
-   nnvm.symbol.avg_pool2d
-   nnvm.symbol.global_max_pool2d
-   nnvm.symbol.global_avg_pool2d
-
-
-**Level 3: Additional Tensor Ops**
-
-.. autosummary::
-   :nosignatures:
-
-   nnvm.symbol.reshape
-   nnvm.symbol.copy
-   nnvm.symbol.negative
-   nnvm.symbol.floor
-   nnvm.symbol.ceil
-   nnvm.symbol.round
-   nnvm.symbol.trunc
-   nnvm.symbol.abs
-   nnvm.symbol.leaky_relu
-   nnvm.symbol.__add_scalar__
-   nnvm.symbol.__sub_scalar__
-   nnvm.symbol.__rsub_scalar__
-   nnvm.symbol.__mul_scalar__
-   nnvm.symbol.__div_scalar__
-   nnvm.symbol.__rdiv_scalar__
-   nnvm.symbol.__pow_scalar__
-   nnvm.symbol.__rpow_scalar__
-   nnvm.symbol.__lshift_scalar__
-   nnvm.symbol.__rshift_scalar__
-
-
-**Level 4: Broadcast and Reductions**
-
-.. autosummary::
-   :nosignatures:
-
-   nnvm.symbol.transpose
-   nnvm.symbol.broadcast_to
-   nnvm.symbol.sum
-   nnvm.symbol.min
-   nnvm.symbol.max
-   nnvm.symbol.mean
-   nnvm.symbol.prod
-   nnvm.symbol.broadcast_add
-   nnvm.symbol.broadcast_sub
-   nnvm.symbol.broadcast_mul
-   nnvm.symbol.broadcast_div
-   nnvm.symbol.clip
-   nnvm.symbol.greater
-   nnvm.symbol.less
-   nnvm.symbol.expand_like
-   nnvm.symbol.reshape_like
-   nnvm.symbol.full
-   nnvm.symbol.full_like
-   nnvm.symbol.ones
-   nnvm.symbol.ones_like
-   nnvm.symbol.zeros
-   nnvm.symbol.zeros_like
-   nnvm.symbol.slice_like
-   nnvm.symbol.strided_slice
-   nnvm.symbol.argmax
-   nnvm.symbol.argmin
-   nnvm.symbol.collapse_sum
-   nnvm.symbol.broadcast_equal
-   nnvm.symbol.broadcast_greater_equal
-   nnvm.symbol.broadcast_greater
-   nnvm.symbol.broadcast_left_shift
-   nnvm.symbol.broadcast_less_equal
-   nnvm.symbol.broadcast_less
-   nnvm.symbol.broadcast_max
-   nnvm.symbol.broadcast_min
-   nnvm.symbol.broadcast_mod
-   nnvm.symbol.broadcast_not_equal
-   nnvm.symbol.broadcast_pow
-   nnvm.symbol.broadcast_right_shift
-
-
-**Level 5: Vision Operators**
-
-.. autosummary::
-   :nosignatures:
-
-   nnvm.symbol.multibox_prior
-   nnvm.symbol.multibox_transform_loc
-   nnvm.symbol.nms
-   nnvm.symbol.yolo_region
-   nnvm.symbol.yolo_reorg
-
-Detailed Definitions
---------------------
-.. autofunction:: nnvm.symbol.dense
-.. autofunction:: nnvm.symbol.relu
-.. autofunction:: nnvm.symbol.prelu
-.. autofunction:: nnvm.symbol.tanh
-.. autofunction:: nnvm.symbol.sigmoid
-.. autofunction:: nnvm.symbol.exp
-.. autofunction:: nnvm.symbol.log
-.. autofunction:: nnvm.symbol.sqrt
-.. autofunction:: nnvm.symbol.logical_and
-.. autofunction:: nnvm.symbol.logical_or
-.. autofunction:: nnvm.symbol.logical_not
-.. autofunction:: nnvm.symbol.elemwise_add
-.. autofunction:: nnvm.symbol.elemwise_sub
-.. autofunction:: nnvm.symbol.elemwise_mul
-.. autofunction:: nnvm.symbol.elemwise_div
-.. autofunction:: nnvm.symbol.elemwise_sum
-.. autofunction:: nnvm.symbol.elemwise_mod
-.. autofunction:: nnvm.symbol.elemwise_pow
-.. autofunction:: nnvm.symbol.flatten
-.. autofunction:: nnvm.symbol.concatenate
-.. autofunction:: nnvm.symbol.expand_dims
-.. autofunction:: nnvm.symbol.squeeze
-.. autofunction:: nnvm.symbol.split
-.. autofunction:: nnvm.symbol.dropout
-.. autofunction:: nnvm.symbol.batch_norm
-.. autofunction:: nnvm.symbol.softmax
-.. autofunction:: nnvm.symbol.log_softmax
-.. autofunction:: nnvm.symbol.pad
-.. autofunction:: nnvm.symbol.block_grad
-.. autofunction:: nnvm.symbol.matmul
-.. autofunction:: nnvm.symbol.resize
-.. autofunction:: nnvm.symbol.upsampling
-.. autofunction:: nnvm.symbol.take
-.. autofunction:: nnvm.symbol.l2_normalize
-.. autofunction:: nnvm.symbol.flip
-.. autofunction:: nnvm.symbol.lrn
-.. autofunction:: nnvm.symbol.where
-.. autofunction:: nnvm.symbol.gather_nd
-
-.. autofunction:: nnvm.symbol.conv2d
-.. autofunction:: nnvm.symbol.conv2d_transpose
-.. autofunction:: nnvm.symbol.max_pool2d
-.. autofunction:: nnvm.symbol.avg_pool2d
-.. autofunction:: nnvm.symbol.global_max_pool2d
-.. autofunction:: nnvm.symbol.global_avg_pool2d
-
-.. autofunction:: nnvm.symbol.reshape
-.. autofunction:: nnvm.symbol.copy
-.. autofunction:: nnvm.symbol.negative
-.. autofunction:: nnvm.symbol.floor
-.. autofunction:: nnvm.symbol.ceil
-.. autofunction:: nnvm.symbol.round
-.. autofunction:: nnvm.symbol.trunc
-.. autofunction:: nnvm.symbol.abs
-.. autofunction:: nnvm.symbol.leaky_relu
-.. autofunction:: nnvm.symbol.__add_scalar__
-.. autofunction:: nnvm.symbol.__sub_scalar__
-.. autofunction:: nnvm.symbol.__rsub_scalar__
-.. autofunction:: nnvm.symbol.__mul_scalar__
-.. autofunction:: nnvm.symbol.__div_scalar__
-.. autofunction:: nnvm.symbol.__rdiv_scalar__
-.. autofunction:: nnvm.symbol.__pow_scalar__
-.. autofunction:: nnvm.symbol.__rpow_scalar__
-.. autofunction:: nnvm.symbol.__lshift_scalar__
-.. autofunction:: nnvm.symbol.__rshift_scalar__
-
-.. autofunction:: nnvm.symbol.transpose
-.. autofunction:: nnvm.symbol.broadcast_to
-.. autofunction:: nnvm.symbol.sum
-.. autofunction:: nnvm.symbol.min
-.. autofunction:: nnvm.symbol.max
-.. autofunction:: nnvm.symbol.mean
-.. autofunction:: nnvm.symbol.prod
-.. autofunction:: nnvm.symbol.broadcast_add
-.. autofunction:: nnvm.symbol.broadcast_sub
-.. autofunction:: nnvm.symbol.broadcast_mul
-.. autofunction:: nnvm.symbol.broadcast_div
-.. autofunction:: nnvm.symbol.clip
-.. autofunction:: nnvm.symbol.greater
-.. autofunction:: nnvm.symbol.less
-.. autofunction:: nnvm.symbol.expand_like
-.. autofunction:: nnvm.symbol.reshape_like
-.. autofunction:: nnvm.symbol.full
-.. autofunction:: nnvm.symbol.full_like
-.. autofunction:: nnvm.symbol.ones
-.. autofunction:: nnvm.symbol.ones_like
-.. autofunction:: nnvm.symbol.zeros
-.. autofunction:: nnvm.symbol.zeros_like
-.. autofunction:: nnvm.symbol.slice_like
-.. autofunction:: nnvm.symbol.strided_slice
-.. autofunction:: nnvm.symbol.argmax
-.. autofunction:: nnvm.symbol.argmin
-.. autofunction:: nnvm.symbol.collapse_sum
-.. autofunction:: nnvm.symbol.broadcast_equal
-.. autofunction:: nnvm.symbol.broadcast_greater_equal
-.. autofunction:: nnvm.symbol.broadcast_greater
-.. autofunction:: nnvm.symbol.broadcast_left_shift
-.. autofunction:: nnvm.symbol.broadcast_less_equal
-.. autofunction:: nnvm.symbol.broadcast_less
-.. autofunction:: nnvm.symbol.broadcast_max
-.. autofunction:: nnvm.symbol.broadcast_min
-.. autofunction:: nnvm.symbol.broadcast_mod
-.. autofunction:: nnvm.symbol.broadcast_not_equal
-.. autofunction:: nnvm.symbol.broadcast_pow
-.. autofunction:: nnvm.symbol.broadcast_right_shift
-
-.. autofunction:: nnvm.symbol.multibox_prior
-.. autofunction:: nnvm.symbol.multibox_transform_loc
-.. autofunction:: nnvm.symbol.nms
-.. autofunction:: nnvm.symbol.yolo_region
-.. autofunction:: nnvm.symbol.yolo_reorg
diff --git a/python/tvm/_ffi/base.py b/python/tvm/_ffi/base.py
index c61c5c445442..6e7c8f9f3824 100644
--- a/python/tvm/_ffi/base.py
+++ b/python/tvm/_ffi/base.py
@@ -56,7 +56,7 @@ def _load_lib():
 
 # version number
 __version__ = libinfo.__version__
-# library instance of nnvm
+# library instance
 _LIB, _LIB_NAME = _load_lib()
 
 # Whether we are runtime only
diff --git a/python/tvm/autotvm/task/__init__.py b/python/tvm/autotvm/task/__init__.py
index 0a0e6e1e8ac7..f249f6bacb90 100644
--- a/python/tvm/autotvm/task/__init__.py
+++ b/python/tvm/autotvm/task/__init__.py
@@ -30,5 +30,4 @@
 
 from .topi_integration import register_topi_compute, register_topi_schedule, \
     TaskExtractEnv
-from .nnvm_integration import extract_from_graph, extract_from_multiple_graph
 from .relay_integration import extract_from_program, extract_from_multiple_program
diff --git a/python/tvm/autotvm/task/nnvm_integration.py b/python/tvm/autotvm/task/nnvm_integration.py
deleted file mode 100644
index 9161822d173c..000000000000
--- a/python/tvm/autotvm/task/nnvm_integration.py
+++ /dev/null
@@ -1,200 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-variable,invalid-name
-"""
-Decorator and utilities for the integration with TOPI and NNVM
-
-"""
-import threading
-import warnings
-import logging
-
-
-from .task import create
-from .topi_integration import TaskExtractEnv
-
-logger = logging.getLogger('autotvm')
-
-
-def extract_from_graph(graph, shape, dtype, target, symbols, params=None, target_host=None):
-    """ Extract tuning tasks from a nnvm graph.
-
-    This function collects tuning tasks by building the graph
-    and trace all the calls to topi.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to tune
-    shape : dict of str to tuple
-        The input shape to the graph
-    dtype : str or dict of str to str
-        The input types to the graph
-    target: tvm.target.Target
-        The compilation target
-    symbols : Array of nnvm.symbol
-        Array of nnvm symbols want to be tuned
-    params : dict of str to NDArray
-        The parameter dictionary.
-    target_host: tvm.target.Target
-        The host compilation target
-
-    Returns
-    -------
-    task: Array of autotvm.task.Task
-        collected tasks
-    """
-    import nnvm.compiler
-    import nnvm
-    import topi
-
-    env = TaskExtractEnv.get()
-
-    # NOTE: To add more symbols, you only need to change the following lists
-    # nnvm symbol -> topi compute
-    SYMBOL2TOPI = {
-        nnvm.sym.conv2d: [topi.nn.conv2d, topi.nn.depthwise_conv2d_nchw,
-                          topi.nn.group_conv2d_nchw],
-        nnvm.sym.conv2d_transpose: [topi.nn.conv2d_transpose_nchw],
-        nnvm.sym.dense: [topi.nn.dense],
-    }
-
-    topi_funcs = []
-    for sym_name in symbols:
-        if sym_name in SYMBOL2TOPI:
-            topi_funcs.extend(SYMBOL2TOPI[sym_name])
-        else:
-            warnings.warn("Symbol %s is not tunable, ignored" % sym_name)
-
-    # run compiler to collect all TOPI calls during compilation
-    env.reset(topi_funcs)
-    with env:
-        # disable logger temporarily
-        old_state = logger.disabled
-        logger.disabled = True
-
-        nnvm.compiler.engine.clear_cache()
-        # wrap build call in thread to avoid multiprocessing problems
-        build_thread = threading.Thread(target=nnvm.compiler.build,
-                                        args=(graph,
-                                              target,
-                                              shape,
-                                              dtype,
-                                              params,
-                                              target_host))
-        build_thread.start()
-        build_thread.join()
-
-        logger.disabled = old_state
-
-    # create tasks for target
-    tasks = []
-    for task_name, args in env.get_tasks():
-        try:
-            tsk = create(task_name, args,
-                         target=target, target_host=target_host,
-                         template_key='direct')
-            tasks.append(tsk)
-        except topi.InvalidShapeError:
-            print("[Warning] Invalid shape during AutoTVM task creation")
-
-    return tasks
-
-
-def extract_from_multiple_graph(graphs, shapes, dtypes, target, symbols, params, target_host=None):
-    """ Extract tuning tasks from multiple nnvm graphs.
-
-    This function is the multiple graph version of extract_from_graph
-
-    Parameters
-    ----------
-    graphs : List of Graph
-        The list of graphs to tune
-    shapes : List of dict of str to tuple
-        The input shape to the graph
-    dtypes : List of str or dict of str to str
-        The input types to the graph
-    target: tvm.target.Target
-        The compilation target
-    symbols : Array of nnvm.symbol
-        Array of nnvm symbols want to be tuned
-    params : dict of str to NDArray
-        The parameter dictionary.
-    target_host: tvm.target.Target
-        The host compilation target
-
-    Returns
-    -------
-    task: Array of autotvm.task.Task
-        collected tasks
-    """
-    import nnvm.compiler
-    import nnvm
-    import topi
-
-    env = TaskExtractEnv.get()
-
-    #NOTE: To add more symbols, you only need to change the following lists
-    #nnvm symbol -> topi compute
-    SYMBOL2TOPI = {
-        nnvm.sym.conv2d: [topi.nn.conv2d, topi.nn.depthwise_conv2d_nchw,
-                          topi.nn.group_conv2d_nchw],
-        nnvm.sym.conv2d_transpose: [topi.nn.conv2d_transpose_nchw],
-        nnvm.sym.dense: [topi.nn.dense],
-    }
-
-    topi_funcs = []
-    for sym_name in symbols:
-        if sym_name in SYMBOL2TOPI:
-            topi_funcs.extend(SYMBOL2TOPI[sym_name])
-        else:
-            warnings.warn("Symbol %s is not tunable, ignored" % sym_name)
-
-    # run compiler to collect all TOPI calls during compilation
-    env.reset(topi_funcs)
-    with env:
-        # disable logger temporarily
-        old_state = logger.disabled
-        logger.disabled = True
-
-        for graph, shape, dtype in zip(graphs, shapes, dtypes):
-            nnvm.compiler.engine.clear_cache()
-            # wrap build call in thread to avoid multiprocessing problems
-            build_thread = threading.Thread(target=nnvm.compiler.build,
-                                            args=(graph,
-                                                  target,
-                                                  shape,
-                                                  dtype,
-                                                  params,
-                                                  target_host))
-            build_thread.start()
-            build_thread.join()
-
-        logger.disabled = old_state
-
-    # create tasks for target
-    tasks = []
-    for task_name, args in env.get_tasks():
-        try:
-            tsk = create(task_name, args,
-                         target=target, target_host=target_host,
-                         template_key='direct')
-            tasks.append(tsk)
-        except topi.InvalidShapeError:
-            print("[Warning] Invalid shape during AutoTVM task creation")
-
-    return tasks
diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py
index 1b446e307bc2..ce33d3ed3c0c 100644
--- a/python/tvm/autotvm/task/topi_integration.py
+++ b/python/tvm/autotvm/task/topi_integration.py
@@ -69,9 +69,9 @@ def deserialize_args(args):
     return ret
 
 
-# Task extractor for nnvm graph, relay program
+# Task extractor for relay program
 class TaskExtractEnv:
-    """Global environment for extracting tuning tasks from nnvm graph"""
+    """Global environment for extracting tuning tasks from graph"""
     current = None
     registered = None
 
@@ -310,7 +310,7 @@ def get_tasks(self):
         Returns
         -------
         tasks: List of tuple(name, args)
-            A list of tasks extracted from the nnvm graph
+            A list of tasks extracted from the graph
         """
         return self.task_collection
 
diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py
index 95e9acb23dff..98369e27a4e8 100644
--- a/python/tvm/autotvm/tophub.py
+++ b/python/tvm/autotvm/tophub.py
@@ -18,8 +18,7 @@
 TopHub: Tensor Operator Hub
 To get the best performance, we typically need auto-tuning for the specific devices.
 TVM releases pre-tuned parameters in TopHub for some common networks and hardware targets.
-TVM will download these parameters for you when you call
-nnvm.compiler.build_module or relay.build.
+TVM will download these parameters for you when you call relay.build.
 """
 # pylint: disable=invalid-name
 
diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py
index 8ee99d7561bd..3fc0d1574b8c 100644
--- a/python/tvm/contrib/debugger/debug_result.py
+++ b/python/tvm/contrib/debugger/debug_result.py
@@ -40,7 +40,7 @@ class DebugResult(object):
     Parameters
     ----------
     graph_json : str
-        The graph to be deployed in json format output by nnvm graph. Each operator (tvm_op)
+        The graph to be deployed in json format output by graph compiler. Each operator (tvm_op)
         in the graph will have a one to one mapping with the symbol in libmod which is used
         to construct a "PackedFunc" .
 
@@ -57,12 +57,12 @@ def __init__(self, graph_json, dump_path):
         self.dump_graph_json(graph_json)
 
     def _parse_graph(self, graph_json):
-        """Parse and extract the NNVM graph and update the nodes, shapes and dltype.
+        """Parse and extract the JSON graph and update the nodes, shapes and dltype.
 
         Parameters
         ----------
         graph_json : str or graph class
-           The graph to be deployed in json format output by nnvm graph.
+           The graph to be deployed in json format output by JSON graph.
         """
         json_obj = json.loads(graph_json)
         self._nodes_list = json_obj['nodes']
@@ -197,7 +197,7 @@ def dump_graph_json(self, graph):
         Parameters
         ----------
         graph : json format
-            json formatted NNVM graph contain list of each node's
+            json formatted JSON graph contain list of each node's
             name, shape and type.
         """
         graph_dump_file_name = GRAPH_DUMP_FILE_NAME
diff --git a/python/tvm/contrib/debugger/debug_runtime.py b/python/tvm/contrib/debugger/debug_runtime.py
index c71cbd2b0c2d..9c81857339c6 100644
--- a/python/tvm/contrib/debugger/debug_runtime.py
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -35,7 +35,7 @@ def create(graph_json_str, libmod, ctx, dump_root=None):
     Parameters
     ----------
     graph_json_str : str or graph class
-        The graph to be deployed in json format output by nnvm graph.
+        The graph to be deployed in json format output by graph compiler.
         The graph can only contain one operator(tvm_op) that
         points to the name of PackedFunc in the libmod.
 
diff --git a/python/tvm/contrib/graph_runtime.py b/python/tvm/contrib/graph_runtime.py
index f4ee2f7db28d..99e0bba7af83 100644
--- a/python/tvm/contrib/graph_runtime.py
+++ b/python/tvm/contrib/graph_runtime.py
@@ -27,7 +27,7 @@ def create(graph_json_str, libmod, ctx):
     Parameters
     ----------
     graph_json_str : str or graph class
-        The graph to be deployed in json format output by nnvm graph.
+        The graph to be deployed in json format output by json graph.
         The graph can only contain one operator(tvm_op) that
         points to the name of PackedFunc in the libmod.
     libmod : tvm.Module
diff --git a/python/tvm/relay/frontend/keras.py b/python/tvm/relay/frontend/keras.py
index 57ee227694db..f0468e7c232b 100644
--- a/python/tvm/relay/frontend/keras.py
+++ b/python/tvm/relay/frontend/keras.py
@@ -362,7 +362,7 @@ def _convert_flatten(inexpr, keras_layer, _):
 def _convert_pooling(inexpr, keras_layer, etab):
     _check_data_format(keras_layer)
     pool_type = type(keras_layer).__name__
-    # global pool in keras = global pool + flatten in nnvm/relay
+    # global pool in keras = global pool + flatten in relay
     if pool_type == 'GlobalMaxPooling2D':
         return _convert_flatten(_op.nn.global_max_pool2d(inexpr), keras_layer, etab)
     if pool_type == 'GlobalAveragePooling2D':
diff --git a/tests/scripts/task_python_frontend.sh b/tests/scripts/task_python_frontend.sh
index fdb7ef6e5790..68d861baa5e0 100755
--- a/tests/scripts/task_python_frontend.sh
+++ b/tests/scripts/task_python_frontend.sh
@@ -19,7 +19,7 @@
 set -e
 set -u
 
-export PYTHONPATH=nnvm/python:python:topi/python
+export PYTHONPATH=python:topi/python
 # to avoid openblas threading error
 export TVM_BIND_THREADS=0
 export OMP_NUM_THREADS=1
diff --git a/tests/scripts/task_python_legacy.sh b/tests/scripts/task_python_legacy.sh
deleted file mode 100755
index db3e882e5a3f..000000000000
--- a/tests/scripts/task_python_legacy.sh
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Test cases for legacy code, will be deprecated in the future.
-set -e
-set -u
-
-export PYTHONPATH=nnvm/python:python:topi/python
-export OMP_NUM_THREADS=1
-
-# Rebuild cython
-make cython3
-
-echo "Running nnvm unittest..."
-python3 -m pytest -v nnvm/tests/python/unittest
-
-
-echo "Running nnvm compiler test..."
-python3 -m pytest -v nnvm/tests/python/compiler
-
-echo "Running nnvm ONNX frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/onnx
-
-echo "Running nnvm MXNet frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/mxnet
-
-echo "Running nnvm DarkNet frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/darknet
-
-echo "Running nnvm Keras frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/keras
-
-echo "Running nnvm Tensorflow frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/tensorflow
-
-echo "Running nnvm CoreML frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/coreml
-
-echo "Running nnvm Caffe2 frontend test..."
-python3 -m pytest -v nnvm/tests/python/frontend/caffe2
diff --git a/tests/scripts/task_python_vta.sh b/tests/scripts/task_python_vta.sh
index 397efa139770..07f680dffbe3 100755
--- a/tests/scripts/task_python_vta.sh
+++ b/tests/scripts/task_python_vta.sh
@@ -19,7 +19,7 @@
 set -e
 set -u
 
-export PYTHONPATH=python:nnvm/python:vta/python:topi/python
+export PYTHONPATH=python:vta/python:topi/python
 
 rm -rf python/tvm/*.pyc python/tvm/*/*.pyc python/tvm/*/*/*.pyc python/tvm/*/*/*/*.pyc
 rm -rf ~/.tvm
diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py
index cbb60857e9ad..6e95de579c19 100644
--- a/topi/python/topi/arm_cpu/conv2d.py
+++ b/topi/python/topi/arm_cpu/conv2d.py
@@ -478,19 +478,19 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos, F):
 
     Parameters
     ----------
-    attrs : nnvm.top.AttrDict or tvm.attrs.Attrs
+    attrs : tvm.attrs.Attrs
         Attributes of current convolution
-    inputs : nnvm.symbol or tvm.relay.Expr
+    inputs : tvm.relay.Expr
         Grouped input symbols
     tinfos : list
         Input shape and dtype
     F: symbol
-        The context, can be either nnvm.sym or relay.op
+        The context, can be either relay.op
 
     Note
     ----
     Unlike other TOPI functions, this function operates on both graph level and operator level,
-    so we have to pass 'F' to make it support our two versions of graph IR, NNVM and Relay.
+    so we have to pass 'F' to make it support our two versions of graph IR,  Relay.
     """
     copy_inputs = [s for s in inputs]
 
diff --git a/topi/python/topi/cuda/conv2d_winograd.py b/topi/python/topi/cuda/conv2d_winograd.py
index f6f8640b495a..d7e4ebb4bffd 100644
--- a/topi/python/topi/cuda/conv2d_winograd.py
+++ b/topi/python/topi/cuda/conv2d_winograd.py
@@ -163,7 +163,7 @@ def schedule_winograd_cuda(cfg, s, output, pre_computed):
         eps, nu, ci, co = s[kernel_pack].op.axis
         if autotvm.GLOBAL_SCOPE.in_tuning:
             # skip this part during tuning to make recrods accurate
-            # this part will be pre-computed during NNVM's pre-compute optimization pass
+            # this part will be pre-computed during pre-compute optimization pass
             s[G].pragma(s[G].op.axis[0], 'debug_skip_region')
             s[kernel_pack].pragma(eps, 'debug_skip_region')
         else:
@@ -311,19 +311,19 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, F):
 
     Parameters
     ----------
-    attrs : nnvm.top.AttrDict or tvm.attrs.Attrs
+    attrs : tvm.attrs.Attrs
         Attributes of current convolution
-    inputs : nnvm.symbol or tvm.relay.Expr
+    inputs : tvm.relay.Expr
         Grouped input symbols
     tinfos : list
         Input shape and dtype
     F: symbol
-        The context, can be either nnvm.sym or relay.op
+        The context, can be relay.op
 
     Note
     ----
     Unlike other TOPI functions, this function operates on both graph level and operator level,
-    so we have to pass 'F' to make it support our two versions of graph IR, NNVM and Relay.
+    so we have to pass 'F' to make it support our two versions of graph IR,  Relay.
     """
     if 'cudnn' in tvm.target.current_target().libs or 'miopen' in tvm.target.current_target().libs:
         return None
@@ -331,9 +331,8 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, F):
     copy_inputs = [s for s in inputs]
     new_attrs = {k: attrs[k] for k in attrs.keys()}
 
-    if F.__name__ == 'tvm.relay.op':
-        # Derive channels for frontends (e.g ONNX) that miss "channel" field.
-        new_attrs["channels"] = inputs[1].checked_type.shape[attrs['kernel_layout'].index('O')]
+
+    new_attrs["channels"] = inputs[1].checked_type.shape[attrs['kernel_layout'].index('O')]
 
     strides = attrs.get_int_tuple("strides")
     padding = attrs.get_int_tuple("padding")
diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py
index 953be584ddd1..77f8cadb991e 100644
--- a/topi/python/topi/generic/nn.py
+++ b/topi/python/topi/generic/nn.py
@@ -159,7 +159,7 @@ def schedule_conv2d_winograd_weight_transform(outs):
     sch: Schedule
         The computation schedule for the op.
     """
-    # Typically this is computed in nnvm PreCompute pass
+    # Typically this is computed in PreCompute pass
     # so we make a schedule here for cpu llvm
     s = tvm.create_schedule([x.op for x in outs])
     output = outs[0]
@@ -205,7 +205,7 @@ def schedule_conv2d_winograd_nnpack_weight_transform(outs):
     sch: Schedule
         The computation schedule for the op.
     """
-    # Typically this is computed in nnvm PreCompute pass
+    # Typically this is computed in PreCompute pass
     s = tvm.create_schedule([x.op for x in outs])
     return s
 
diff --git a/topi/python/topi/intel_graphics/conv2d.py b/topi/python/topi/intel_graphics/conv2d.py
index 9b0ad0c4ce8c..1cda266929b8 100644
--- a/topi/python/topi/intel_graphics/conv2d.py
+++ b/topi/python/topi/intel_graphics/conv2d.py
@@ -189,8 +189,6 @@ def __topi_nn_conv2d_NCHWc(*args, **kwargs):
 
 @conv2d_alter_layout.register(["intel_graphics"])
 def _alter_conv2d_layout(attrs, inputs, tinfo, F):
-    import nnvm.symbol as sym
-
     copy_inputs = [s for s in inputs]
     new_attrs = {k : attrs[k] for k in attrs.keys()}
 
@@ -208,7 +206,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfo, F):
     dilation = attrs.get_int_tuple("dilation")
     out_dtype = attrs["out_dtype"]
 
-    layout_name = 'layout' if F == sym else 'data_layout'
+    layout_name = 'data_layout'
     layout = attrs[layout_name]
     kh, kw = attrs.get_int_tuple("kernel_size")
 
@@ -258,8 +256,6 @@ def _alter_conv2d_layout(attrs, inputs, tinfo, F):
          new_attrs['out_layout'], out_dtype], conv2d_NCHWc)
 
     dispatch_ctx.update(target, new_workload, cfg)
-    if F == sym:
-        return F.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
     return F.nn.contrib_conv2d_nchwc(*copy_inputs, **new_attrs)
 
 @autotvm.register_topi_compute(conv2d_NCHWc, 'intel_graphics', 'direct')
diff --git a/topi/python/topi/nn/conv2d.py b/topi/python/topi/nn/conv2d.py
index 130632fd08a9..5af30335a9c5 100644
--- a/topi/python/topi/nn/conv2d.py
+++ b/topi/python/topi/nn/conv2d.py
@@ -99,19 +99,19 @@ def conv2d_alter_layout(attrs, inputs, tinfos, F):
 
     Parameters
     ----------
-    attrs : nnvm.top.AttrDict or tvm.attrs.Attrs
+    attrs : tvm.attrs.Attrs
         Attributes of current convolution
-    inputs : nnvm.symbol or tvm.relay.Expr
+    inputs : tvm.relay.Expr
         Grouped input symbols
     tinfos : list
         Input shape and dtype
     F: symbol
-        The context, can be either nnvm.sym or relay.op
+        The context, can be either relay.op
 
     Note
     ----
     Unlike other TOPI functions, this function operates on both graph level and operator level,
-    so we have to pass 'F' to make it support our two versions of graph IR, NNVM and Relay.
+    so we have to pass 'F' to make it support our two versions of graph IR, Relay.
     """
     # not to change by default
     return None
diff --git a/topi/python/topi/x86/conv2d_alter_op.py b/topi/python/topi/x86/conv2d_alter_op.py
index a02f919cba0e..55e0d0d0596c 100644
--- a/topi/python/topi/x86/conv2d_alter_op.py
+++ b/topi/python/topi/x86/conv2d_alter_op.py
@@ -39,7 +39,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfo, F):
     strides = attrs.get_int_tuple("strides")
     dilation = attrs.get_int_tuple("dilation")
     out_dtype = attrs["out_dtype"]
-    layout_name = 'layout' if F.__name__ == 'nnvm.symbol' else 'data_layout'
+    layout_name = 'data_layout'
     data_layout = attrs[layout_name]
     kh, kw = attrs.get_int_tuple("kernel_size")
 
@@ -109,9 +109,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfo, F):
             [new_data, new_kernel, strides, padding, dilation, new_attrs[layout_name],
              new_attrs['out_layout'], out_dtype], depthwise_conv2d_NCHWc)
         dispatch_ctx.update(target, new_workload, cfg)
-        if F.__name__ == 'nnvm.symbol':
-            logging.warning("Use native layout for depthwise convolution on NNVM.")
-            return None
+
         return F.nn.contrib_depthwise_conv2d_nchwc(*copy_inputs, **new_attrs)
 
     if _is_int8_hw_support(data_dtype, kernel_dtype):
@@ -153,9 +151,6 @@ def _alter_conv2d_layout(attrs, inputs, tinfo, F):
                                                       out_dtype],
                                                      conv2d_NCHWc_int8)
         dispatch_ctx.update(target, new_workload, cfg)
-        if F.__name__ == 'nnvm.symbol':
-            logging.warning("Use native layout for int8 convolution on NNVM.")
-            return None
         return F.nn.contrib_conv2d_nchwc_int8(*copy_inputs, **new_attrs)
 
     # (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
@@ -168,8 +163,6 @@ def _alter_conv2d_layout(attrs, inputs, tinfo, F):
          new_attrs['out_layout'], out_dtype], conv2d_NCHWc)
     dispatch_ctx.update(target, new_workload, cfg)
 
-    if F.__name__ == 'nnvm.symbol':
-        return F.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
     return F.nn.contrib_conv2d_nchwc(*copy_inputs, **new_attrs)