diff --git a/CMakeLists.txt b/CMakeLists.txt
index 292bd6780c52..afcb73041469 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -118,9 +118,8 @@ else(MSVC)
 endif(MSVC)
 
 # add source group
-FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "nnvm/src/*.cc")
-FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h"
-                                "nnvm/src/*.h" "nnvm/include/*.h")
+FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc")
+FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h")
 assign_source_group("Source" ${GROUP_SOURCE})
 assign_source_group("Include" ${GROUP_INCLUDE})
 
@@ -174,13 +173,6 @@ if(NOT MSVC)
   list(APPEND COMPILER_SRCS ${COMPILER_VERILOG_SRCS})
 endif()
 
-file(GLOB_RECURSE NNVM_COMPILER_SRCS
-    nnvm/src/c_api/*.cc
-    nnvm/src/core/*.cc
-    nnvm/src/pass/*.cc
-    nnvm/src/compiler/*.cc
-    nnvm/src/top/*.cc
-    )
 
 file(GLOB TOPI_SRCS
     topi/src/*.cc
@@ -294,7 +286,6 @@ if(NOT USE_SGX STREQUAL "OFF")
   add_dependencies(tvm_runtime sgx_edl tvm_t)
   install(TARGETS tvm_t ARCHIVE DESTINATION lib${LIB_SUFFIX})
 endif()
-add_library(nnvm_compiler SHARED ${NNVM_COMPILER_SRCS})
 
 if(USE_THREADS)
   message(STATUS "Build with thread support...")
@@ -304,13 +295,11 @@ if(USE_THREADS)
   target_link_libraries(tvm Threads::Threads)
   target_link_libraries(tvm_topi Threads::Threads)
   target_link_libraries(tvm_runtime Threads::Threads)
-  target_link_libraries(nnvm_compiler Threads::Threads)
 endif(USE_THREADS)
 
 target_link_libraries(tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
 target_link_libraries(tvm_topi tvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
 target_link_libraries(tvm_runtime ${TVM_RUNTIME_LINKER_LIBS})
-target_link_libraries(nnvm_compiler tvm)
 
 if (HIDE_PRIVATE_SYMBOLS AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
   set(HIDE_SYMBOLS_LINKER_FLAGS "-Wl,--exclude-libs,ALL")
@@ -320,7 +309,6 @@ if (HIDE_PRIVATE_SYMBOLS AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
   target_link_libraries(tvm ${HIDE_SYMBOLS_LINKER_FLAGS})
   target_link_libraries(tvm_topi ${HIDE_SYMBOLS_LINKER_FLAGS})
   target_link_libraries(tvm_runtime ${HIDE_SYMBOLS_LINKER_FLAGS})
-  target_link_libraries(nnvm_compiler ${HIDE_SYMBOLS_LINKER_FLAGS})
 endif()
 
 # Related headers
@@ -330,10 +318,7 @@ target_include_directories(
 target_include_directories(
   tvm_topi
   PUBLIC "topi/include")
-target_include_directories(
-  nnvm_compiler
-  PUBLIC "nnvm/include"
-  PUBLIC "topi/include")
+
 
 # Tests
 set(TEST_EXECS "")
@@ -372,7 +357,6 @@ add_custom_target(runtime DEPENDS tvm_runtime)
 install(TARGETS tvm DESTINATION lib${LIB_SUFFIX})
 install(TARGETS tvm_topi DESTINATION lib${LIB_SUFFIX})
 install(TARGETS tvm_runtime DESTINATION lib${LIB_SUFFIX})
-install(TARGETS nnvm_compiler DESTINATION lib${LIB_SUFFIX})
 
 if (INSTALL_DEV)
   install(
@@ -395,11 +379,6 @@ if (INSTALL_DEV)
     FILES_MATCHING
     PATTERN "*.h"
     )
-  install(
-    DIRECTORY "nnvm/include/." DESTINATION "include"
-    FILES_MATCHING
-    PATTERN "*.h"
-    )
 else(INSTALL_DEV)
   install(
     DIRECTORY "include/tvm/runtime/." DESTINATION "include/tvm/runtime"
@@ -412,5 +391,4 @@ endif(INSTALL_DEV)
 if(MSVC)
   target_compile_definitions(tvm PRIVATE -DTVM_EXPORTS)
   target_compile_definitions(tvm_runtime PRIVATE -DTVM_EXPORTS)
-  target_compile_definitions(nnvm_compiler PRIVATE -DNNVM_EXPORTS)
 endif()
diff --git a/Makefile b/Makefile
index d3ad1030b9f2..d34fbe4c9d88 100644
--- a/Makefile
+++ b/Makefile
@@ -69,14 +69,12 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc
 cpplint:
 	python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
 	python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include;
-	python3 3rdparty/dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src;
 	python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src \
 	 examples/extension/src examples/graph_executor/src
 
 pylint:
 	python3 -m pylint python/tvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc
 	python3 -m pylint topi/python/topi --rcfile=$(ROOTDIR)/tests/lint/pylintrc
-	python3 -m pylint nnvm/python/nnvm --rcfile=$(ROOTDIR)/tests/lint/pylintrc
 	python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc
 
 jnilint:
diff --git a/conda/tvm/build.sh b/conda/tvm/build.sh
index 494f90f0afa0..358e0b91798a 100644
--- a/conda/tvm/build.sh
+++ b/conda/tvm/build.sh
@@ -6,9 +6,9 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #   http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -26,7 +26,3 @@ cd ..
 cd topi/python
 $PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt
 cd ../..
-
-cd nnvm/python
-$PYTHON setup.py install --single-version-externally-managed --record=/tmp/record.txt
-cd ../..
diff --git a/conda/tvm/meta.yaml b/conda/tvm/meta.yaml
index 12f9a9698d70..3ce0f5e4726c 100644
--- a/conda/tvm/meta.yaml
+++ b/conda/tvm/meta.yaml
@@ -48,7 +48,6 @@ test:
   imports:
     - tvm
     - topi
-    - nnvm
   requires:
     - pytest
     - scipy
diff --git a/docker/Dockerfile.demo_android b/docker/Dockerfile.demo_android
index 4d52411444f7..13d1a2175b88 100644
--- a/docker/Dockerfile.demo_android
+++ b/docker/Dockerfile.demo_android
@@ -70,5 +70,5 @@ RUN cd /usr && \
     make -j10
 
 # Environment variables
-ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH}
+ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH}
 ENV ANDROID_HOME=/opt/android-sdk-linux/
diff --git a/docker/Dockerfile.demo_cpu b/docker/Dockerfile.demo_cpu
index 63dc3a15d088..6700579bc41b 100644
--- a/docker/Dockerfile.demo_cpu
+++ b/docker/Dockerfile.demo_cpu
@@ -30,4 +30,4 @@ COPY install/install_tvm_cpu.sh /install/install_tvm_cpu.sh
 RUN bash /install/install_tvm_cpu.sh
 
 # Environment variables
-ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH}
+ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH}
diff --git a/docker/Dockerfile.demo_gpu b/docker/Dockerfile.demo_gpu
index 9be8c00f941c..0591050c5270 100644
--- a/docker/Dockerfile.demo_gpu
+++ b/docker/Dockerfile.demo_gpu
@@ -28,7 +28,7 @@ COPY install/install_tvm_gpu.sh /install/install_tvm_gpu.sh
 RUN bash /install/install_tvm_gpu.sh
 
 # Environment variables
-ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/nnvm/python/:/usr/tvm/vta/python:${PYTHONPATH}
+ENV PYTHONPATH=/usr/tvm/python:/usr/tvm/topi/python:/usr/tvm/vta/python:${PYTHONPATH}
 ENV PATH=/usr/local/nvidia/bin:${PATH}
 ENV PATH=/usr/local/cuda/bin:${PATH}
 ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
diff --git a/docker/Dockerfile.demo_opencl b/docker/Dockerfile.demo_opencl
index 7afb2243cb52..bf27eef862f5 100644
--- a/docker/Dockerfile.demo_opencl
+++ b/docker/Dockerfile.demo_opencl
@@ -76,7 +76,6 @@ RUN mkdir -p ${TVM_BUILD_DIR} && \
 	make -j6
 
 RUN echo "Building Python package"
-ENV PYTHONPATH=${TVM_HOME}/python:${TVM_HOME}/topi/python:${TVM_HOME}/nnvm/python:${PYTHONPATH}
+ENV PYTHONPATH=${TVM_HOME}/python:${TVM_HOME}/topi/python:${PYTHONPATH}
 RUN cd ${TVM_HOME}/python && python3 setup.py install --user
 RUN cd ${TVM_HOME}/topi/python && python3 setup.py install --user
-RUN cd ${TVM_HOME}/nnvm/python && python3 setup.py install --user
diff --git a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
index 03695dc9045b..c31c67f283af 100644
--- a/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
+++ b/jvm/core/src/main/java/org/apache/tvm/contrib/GraphRuntime.java
@@ -32,7 +32,7 @@
 public class GraphRuntime {
   /**
    * Create a runtime executor module given a graph and module.
-   * @param graphJson The graph deployed in json format output by nnvm graph.
+   * @param graphJson The graph deployed in json format output by compiler.
    * @param libmod The module of the corresponding function.
    * @param ctx The local or remote context to deploy the module.
    * @return Runtime graph module that can be used to execute the graph.
diff --git a/nnvm/Makefile b/nnvm/Makefile
index 39763cb59db8..14af3b294e73 100644
--- a/nnvm/Makefile
+++ b/nnvm/Makefile
@@ -30,7 +30,6 @@ TVMPATH = ..
 
 export LDFLAGS = -pthread -lm
 export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC
-CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/3rdparty/dlpack/include -I$(TVMPATH)/3rdparty/HalideIR/src -I$(TVMPATH)/topi/include
 
 ifdef DMLC_CORE_PATH
   CFLAGS += -I$(DMLC_CORE_PATH)/include
@@ -66,7 +65,7 @@ else
 	NO_WHOLE_ARCH= --no-whole-archive
 endif
 
-all: lib/libnnvm.a lib/libnnvm_compiler.$(SHARED_LIBRARY_SUFFIX)
+all: lib/libnnvm.a lib/libnnvm.$(SHARED_LIBRARY_SUFFIX)
 
 SRC = $(wildcard src/*.cc src/c_api/*.cc src/core/*.cc src/pass/*.cc)
 SRC_COMPILER = $(wildcard src/top/*/*.cc wildcard src/top/vision/*/*.cc src/compiler/*.cc src/compiler/*/*.cc)
@@ -87,7 +86,7 @@ lib/libnnvm.a: $(ALL_DEP)
 	@mkdir -p $(@D)
 	$(AR) crv $@ $(filter %.o, $?)
 
-lib/libnnvm_compiler.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ}
+lib/libnnvm.$(SHARED_LIBRARY_SUFFIX): lib/libnnvm.a ${TOP_OBJ}
 	@mkdir -p $(@D)
 	$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o, $^) $(LDFLAGS) -Wl,${WHOLE_ARCH} lib/libnnvm.a -Wl,${NO_WHOLE_ARCH}
 
diff --git a/nnvm/README.md b/nnvm/README.md
index e3b451d63dcd..54caa17e2ce3 100644
--- a/nnvm/README.md
+++ b/nnvm/README.md
@@ -15,38 +15,8 @@
 <!--- specific language governing permissions and limitations -->
 <!--- under the License. -->
 
-# NNVM Compiler Module of TVM Stack
+# NNVM
 
-```python
-import tvm
-from tvm.contrib import graph_runtime, rpc
-import nnvm.frontend
-import nnvm.compiler
-
-# GET model from frameworks
-# change xyz to supported framework name.
-graph, params = nnvm.frontend.from_xyz(...)
-
-# OPTIMIZE and COMPILE the graph to get a deployable module
-# target can be "opencl", "llvm", "metal" or any target supported by tvm
-target = "cuda"
-graph, lib, params = nnvm.compiler.build(graph, target, {"data", data_shape}, params=params)
-
-# DEPLOY and run on gpu(0)
-module = graph_runtime.create(graph, lib, tvm.gpu(0))
-module.set_input(**params)
-module.run(data=data_array)
-output = tvm.nd.empty(out_shape, ctx=tvm.gpu(0))
-module.get_output(0, output)
-
-# DEPLOY to REMOTE mobile/rasp/browser with minimum tvm rpc runtime
-# useful for quick experiments on mobile devices
-remote = rpc.connect(remote_host, remote_port)
-lib.export_library("mylib.so")
-remote.upload("mylib.so")
-rlib = rpc.load_module("mylib.so")
-# run on remote device
-rmodule = graph_runtime.create(graph, rlib, remote.gpu(0))
-rmodule.set_input(**params)
-rmodule.run()
-```
+NNVM is a graph level IR for neural networks.
+We are moving towards Relay IR, a better unified IR that support wider range of programs.
+Please use relay instead.
diff --git a/nnvm/include/nnvm/base.h b/nnvm/include/nnvm/base.h
index 2fd71c7d087e..678ed4d4a942 100644
--- a/nnvm/include/nnvm/base.h
+++ b/nnvm/include/nnvm/base.h
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -46,6 +46,24 @@ using dmlc::get;
 /*!\brief "unsafe" getter function of any type */
 using dmlc::unsafe_get;
 
+enum TypeFlag {
+  kFloat32 = 0,
+  kFloat64 = 1,
+  kFloat16 = 2,
+  kUint8 = 3,
+  kInt32 = 4,
+  kInt8  = 5,
+  kInt64 = 6,
+  // kBool = 7,
+  // 7 is reserved for kBool, in order to keep consistency with MXNet TypeFlag defined in
+  // https://github.com/apache/incubator-mxnet/blob/master/3rdparty/mshadow/mshadow/base.h#L314
+  kInt16 = 8,
+  kUint16 = 9,
+  kUint32 = 10,
+  kUint64 = 11,
+  kBfloat16 = 12,
+};
+
 }  // namespace nnvm
 
 // describe op registration point
diff --git a/nnvm/include/nnvm/compiler/op_attr_types.h b/nnvm/include/nnvm/compiler/op_attr_types.h
deleted file mode 100644
index 12b4415850d4..000000000000
--- a/nnvm/include/nnvm/compiler/op_attr_types.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/compiler/op_attr_types.h
- * \brief The Expr and related elements in DataFlow construction.
- */
-#ifndef NNVM_COMPILER_OP_ATTR_TYPES_H_
-#define NNVM_COMPILER_OP_ATTR_TYPES_H_
-
-#include <tvm/expr.h>
-#include <tvm/tensor.h>
-#include <tvm/schedule.h>
-#include <tvm/packed_func_ext.h>
-#include <tvm/runtime/registry.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/graph.h>
-#include <vector>
-#include <string>
-#include "packed_func_ext.h"
-
-namespace nnvm {
-namespace compiler {
-
-using ::tvm::Array;
-using ::tvm::Tensor;
-using ::tvm::Schedule;
-
-/*! \brief operator pattern used in graph fusion */
-enum OpPatternKind {
-  // Elementwise operation
-  kElemWise = 0,
-  // Broadcasting operator, can always map output axis to the input in order.
-  // for example :code:`out[i, ax1, j, ax2] = input[i, j]`.
-  // Note that the axis need to be in order so transpose is not a bcast operator.
-  kBroadcast = 1,
-  // Injective operator, can always injectively map output axis to a single input axis.
-  // All injective operator can still be safely fused to injective and reduction.
-  kInjective = 2,
-  // Communicative reduction operator.
-  kCommReduce = 3,
-  // Complex operation, can still fuse elemwise operations into its output.
-  // but cannot chain another complex op
-  kOutEWiseFusable = 4,
-  // Opaque operation, cannot fuse anything.
-  kOpaque = 8
-};
-
-/*! \brief the operator pattern */
-using TOpPattern = int;
-
-/*!
- * \brief Computation description interface
- * \param attrs The attribute of the node.
- * \param inputs The input tensors(placeholders)
- * \param out_info Tensors holding shape/type information about output,
- &                 these are always placeholders.
- * \return The output description of the tensor.
- */
-using FTVMCompute = std::function<
-  Array<Tensor>(const NodeAttrs& attrs,
-                const Array<Tensor>& inputs,
-                const Array<Tensor>& out_info)>;
-
-/*!
- * \brief Build the computation schedule for
- *  op whose root is at current op.
- * \param attrs The attribute of the node.
- * \param outs The output tensors.
- * \param target The build target.
- * \return schedule The computation schedule.
- */
-using FTVMSchedule = std::function<
-  Schedule(const NodeAttrs& attrs,
-           const Array<Tensor>& outs,
-           const std::string& target)>;
-
-/*!
- * \brief Modify the op node to alter its input layout.
- *  it is invoked in AlterOpLayout pass.
- * \param attrs The attribute of the original node.
- * \param inputs The input symbols of the original node.
- * \param tinfos The inferred shape and dtype of the inputs.
- * \param ret The replaced operator.
- * \return Whether to replace current operator.
- */
-using FTVMAlterOpLayout = std::function<
-  bool(const NodeAttrs& attrs,
-       const Symbol& inputs,
-       const Array<Tensor>& tinfos,
-       Symbol* ret)>;
-
-/*!
- * \brief Transform from normal operator to vectorized operator
- * \param node The source node.
- * \return Transformed vectorized op.
- */
-using FTVMVectorizedOp = std::function<nnvm::NodePtr (const nnvm::Node* node)>;
-
-}  // namespace compiler
-}  // namespace nnvm
-#endif  // NNVM_COMPILER_OP_ATTR_TYPES_H_
diff --git a/nnvm/include/nnvm/compiler/packed_func_ext.h b/nnvm/include/nnvm/compiler/packed_func_ext.h
deleted file mode 100644
index 67a43a7b4104..000000000000
--- a/nnvm/include/nnvm/compiler/packed_func_ext.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/compiler/packed_func_ext.h
- * \brief Extension to enable packed functionn for nnvm types
- */
-#ifndef NNVM_COMPILER_PACKED_FUNC_EXT_H_
-#define NNVM_COMPILER_PACKED_FUNC_EXT_H_
-
-#include <tvm/runtime/packed_func.h>
-#include <tvm/runtime/registry.h>
-#include <nnvm/graph.h>
-#include <nnvm/symbolic.h>
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-namespace nnvm {
-namespace compiler {
-
-using tvm::runtime::PackedFunc;
-
-using AttrDict = std::unordered_map<std::string, std::string>;
-
-/*!
- * \brief Get PackedFunction from global registry and
- *  report error if it does not exist
- * \param name The name of the function.
- * \return The created PackedFunc.
- */
-inline const PackedFunc& GetPackedFunc(const std::string& name) {
-  const PackedFunc* pf = tvm::runtime::Registry::Get(name);
-  CHECK(pf != nullptr) << "Cannot find function " << name << " in registry";
-  return *pf;
-}
-}  // namespace compiler
-}  // namespace nnvm
-
-// Enable the graph and symbol object exchange.
-namespace tvm {
-namespace runtime {
-
-template<>
-struct extension_type_info<nnvm::Symbol> {
-  static const int code = 16;
-};
-
-template<>
-struct extension_type_info<nnvm::Graph> {
-  static const int code = 17;
-};
-
-template<>
-struct extension_type_info<nnvm::compiler::AttrDict> {
-  static const int code = 18;
-};
-
-}  // namespace runtime
-}  // namespace tvm
-#endif  // NNVM_COMPILER_PACKED_FUNC_EXT_H_
diff --git a/nnvm/include/nnvm/compiler/util.h b/nnvm/include/nnvm/compiler/util.h
deleted file mode 100644
index 63d065576213..000000000000
--- a/nnvm/include/nnvm/compiler/util.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
-* \file nnvm/compiler/util.h
-* \brief Utility functions for nnvm compiler
-*/
-#ifndef NNVM_COMPILER_UTIL_H_
-#define NNVM_COMPILER_UTIL_H_
-
-#include <tvm/expr.h>
-#include <nnvm/tuple.h>
-
-namespace nnvm {
-namespace compiler {
-
-/*
- * \brief Helper function to convert TShape to TVM array. Useful for
- * passing data from NNVM param structures to TOPI ops.
- *
- * \param shape The shape to convert
- *
- * \return An Array of Expr, where each element is a constant int32
- */
-inline tvm::Array<tvm::Expr> ShapeToArray(TShape shape) {
-  tvm::Array<tvm::Expr> result;
-  for (auto i : shape) {
-    result.push_back(tvm::make_const(tvm::DataType::Int(32), i));
-  }
-  return result;
-}
-
-/*
- * \brief Helper function to convert TShape to TVM array. Useful for
- * passing data from NNVM param structures to TOPI ops.
- *
- * \param shape The shape to convert
- *
- * \return An Array of Expr, where each element is a constant int32
- */
-inline tvm::Array<tvm::Integer> ShapeToIntArray(TShape shape) {
-  return tvm::Downcast<tvm::Array<tvm::Integer> >(ShapeToArray(shape));
-}
-}  // namespace compiler
-}  // namespace nnvm
-#endif  // NNVM_COMPILER_UTIL_H_
diff --git a/nnvm/include/nnvm/top/README b/nnvm/include/nnvm/top/README
deleted file mode 100644
index 09a4d6fc387f..000000000000
--- a/nnvm/include/nnvm/top/README
+++ /dev/null
@@ -1 +0,0 @@
-NNVM Core Operator and Compiler
diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h
deleted file mode 100644
index f2a3e81472e1..000000000000
--- a/nnvm/include/nnvm/top/nn.h
+++ /dev/null
@@ -1,555 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/top/nn.h
- * \brief Auxiliary param for tensor primitive.
- */
-#ifndef NNVM_TOP_NN_H_
-#define NNVM_TOP_NN_H_
-
-#include <dmlc/base.h>
-#include <dmlc/parameter.h>
-#include <nnvm/tuple.h>
-#include <nnvm/layout.h>
-#include <string>
-#include "tensor.h"
-
-namespace nnvm {
-namespace top {
-
-struct DenseParam : public dmlc::Parameter<DenseParam> {
-  int units;
-  bool use_bias;
-
-  DMLC_DECLARE_PARAMETER(DenseParam) {
-    DMLC_DECLARE_FIELD(units).set_lower_bound(1)
-    .describe("Number of hidden units of the dense transformation.");
-    DMLC_DECLARE_FIELD(use_bias).set_default(true)
-    .describe("Whether to use bias parameter");
-  }
-  // constants
-  static const constexpr int kData = 0;
-  static const constexpr int kWeight = 1;
-  static const constexpr int kBias = 2;
-};
-
-struct DropoutParam : public dmlc::Parameter<DropoutParam> {
-  float rate;
-
-  DMLC_DECLARE_PARAMETER(DropoutParam) {
-    DMLC_DECLARE_FIELD(rate).set_default(0.5)
-        .set_range(0, 1)
-        .describe("Fraction of the input that gets dropped out during training time.");
-  }
-};
-
-struct BatchNormParam : public dmlc::Parameter<BatchNormParam> {
-  int axis;
-  double epsilon;
-  double momentum;
-  bool center;
-  bool scale;
-
-  DMLC_DECLARE_PARAMETER(BatchNormParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(1)
-      .describe("Specify which shape axis the channel is specified.");
-    DMLC_DECLARE_FIELD(epsilon).set_default(1e-5)
-        .describe("Small float added to variance to avoid dividing by zero.");
-    DMLC_DECLARE_FIELD(center).set_default(true)
-        .describe("If True, add offset of `beta` to normalized tensor."
-                  "If False, `beta` is ignored.");
-    DMLC_DECLARE_FIELD(scale).set_default(true)
-        .describe("If True, multiply by `gamma`. If False, `gamma` is not used."
-                  "When the next layer is piecewise linear (also e.g. `nn.relu`),"
-                  "this can be disabled since the scaling"
-                  "will be done by the next layer.");
-  }
-  // constants
-  static const constexpr int kData = 0;
-  static const constexpr int kGamma = 1;
-  static const constexpr int kBeta = 2;
-  static const constexpr int kMovingMean = 3;
-  static const constexpr int kMovingVariance = 4;
-};
-
-
-// Shared by softmax and log_softmax
-struct SoftmaxParam : public dmlc::Parameter<SoftmaxParam> {
-  int axis;
-
-  DMLC_DECLARE_PARAMETER(SoftmaxParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(-1)
-        .describe("The axis to sum over when computing softmax.");
-  }
-};
-
-struct LeakyReLUParam : public dmlc::Parameter<LeakyReLUParam> {
-  double alpha;
-
-  DMLC_DECLARE_PARAMETER(LeakyReLUParam) {
-    DMLC_DECLARE_FIELD(alpha).set_lower_bound(0.0).set_default(0.25)
-        .describe("slope coefficient for the negative half axis.");
-  }
-};
-
-struct PReLUParam : public dmlc::Parameter<PReLUParam> {
-  int axis;
-  DMLC_DECLARE_PARAMETER(PReLUParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(1)
-      .describe("Specify which shape axis the channel is specified.");
-  }
-};
-
-struct PadParam : public dmlc::Parameter<PadParam> {
-  float pad_value;
-  Tuple<Tuple<int> > pad_width;
-
-  DMLC_DECLARE_PARAMETER(PadParam) {
-    DMLC_DECLARE_FIELD(pad_value).set_default(0.0)
-      .describe("The value to be padded.");
-    DMLC_DECLARE_FIELD(pad_width)
-      .describe("Number of values padded to the edges of each axis, "
-                "in the format of ((before_1, after_1), ... (before_N, after_N))");
-  }
-};
-
-
-struct Conv2DParam : public dmlc::Parameter<Conv2DParam> {
-  int channels;
-  TShape kernel_size;
-  TShape strides;
-  TShape padding;
-  TShape dilation;
-  int groups;
-  std::string layout;
-  std::string kernel_layout;
-  std::string out_layout;
-  int out_dtype;
-  bool use_bias;
-
-  DMLC_DECLARE_PARAMETER(Conv2DParam) {
-    DMLC_DECLARE_FIELD(channels)
-      .describe("The dimensionality of the output space"
-                "i.e. the number of output channels in the convolution.");
-    DMLC_DECLARE_FIELD(kernel_size)
-      .describe("Specifies the dimensions of the convolution window.");
-    DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
-      .describe("Specifies the strides of the convolution.");
-    DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
-      .describe("If padding is non-zero, then the input is implicitly zero-padded"
-                "on both sides for padding number of points");
-    DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1}))
-      .describe("Specifies the dilation rate to use for dilated convolution.");
-    DMLC_DECLARE_FIELD(groups).set_default(1)
-      .describe("Controls the connections between inputs and outputs."
-                "At groups=1, all inputs are convolved to all outputs."
-                "At groups=2, the operation becomes equivalent to having two convolution"
-                "layers side by side, each seeing half the input channels, and producing"
-                "half the output channels, and both subsequently concatenated.");
-    DMLC_DECLARE_FIELD(layout).set_default("NCHW")
-      .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Convolution is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(out_layout).set_default("__undef__")
-      .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Default to be same as input layout.");
-    DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW")
-      .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
-                "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
-                "dimensions respectively.");
-    DMLC_DECLARE_DTYPE_FIELD(out_dtype)
-      .add_enum("same", -1)
-      .set_default(-1)
-      .describe("Output data type, set to explicit type under mixed precision setting");
-
-    DMLC_DECLARE_FIELD(use_bias).set_default(true)
-      .describe("Whether the layer uses a bias vector.");
-  }
-  // constants
-  static const constexpr int kData = 0;
-  static const constexpr int kWeight = 1;
-  static const constexpr int kBias = 2;
-};
-
-struct WinogradWeightTransformParam : public dmlc::Parameter<WinogradWeightTransformParam> {
-    int tile_size;
-
-    DMLC_DECLARE_PARAMETER(WinogradWeightTransformParam) {
-      DMLC_DECLARE_FIELD(tile_size)
-        .describe("Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3)");
-    }
-
-    static const constexpr int kWeight = 0;
-};
-
-struct WinogradNNPACKWeightTransformParam
-    : public dmlc::Parameter<WinogradNNPACKWeightTransformParam> {
-  int convolution_algorithm;
-  int out_dtype;
-
-  DMLC_DECLARE_PARAMETER(WinogradNNPACKWeightTransformParam) {
-    DMLC_DECLARE_FIELD(convolution_algorithm)
-        .describe(
-            "The convolution algorithm for Winograd NNPACK. "
-            "E.g. tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8 for WT_8x8, "
-            "tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8_FP16 for WT_8x8_FP16");
-    DMLC_DECLARE_DTYPE_FIELD(out_dtype)
-        .add_enum("same", -1)
-        .set_default(-1)
-        .describe("Output data type, set to explicit type under mixed precision setting");
-  }
-
-  static const constexpr int kWeight = 0;
-};
-
-struct WinogradConv2DParam : public dmlc::Parameter<WinogradConv2DParam> {
-  int channels;
-  TShape kernel_size;
-  TShape strides;
-  TShape padding;
-  TShape dilation;
-  int groups;
-  std::string layout;
-  std::string kernel_layout;
-  std::string out_layout;
-  int out_dtype;
-  bool use_bias;
-  int tile_size;
-
-  DMLC_DECLARE_PARAMETER(WinogradConv2DParam) {
-    DMLC_DECLARE_FIELD(channels)
-      .describe("The dimensionality of the output space"
-                "i.e. the number of output channels in the convolution.");
-    DMLC_DECLARE_FIELD(kernel_size)
-      .describe("Specifies the dimensions of the convolution window.");
-    DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
-      .describe("Specifies the strides of the convolution.");
-    DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
-      .describe("If padding is non-zero, then the input is implicitly zero-padded"
-                "on both sides for padding number of points");
-    DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1}))
-      .describe("Specifies the dilation rate to use for dilated convolution.");
-    DMLC_DECLARE_FIELD(groups).set_default(1)
-      .describe("Controls the connections between inputs and outputs."
-                "At groups=1, all inputs are convolved to all outputs."
-                "At groups=2, the operation becomes equivalent to having two convolution"
-                "layers side by side, each seeing half the input channels, and producing"
-                "half the output channels, and both subsequently concatenated.");
-    DMLC_DECLARE_FIELD(layout).set_default("NCHW")
-      .describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Convolution is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(out_layout).set_default("__undef__")
-      .describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Default to be same as input layout.");
-    DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW")
-      .describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
-                "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
-                "dimensions respectively.");
-    DMLC_DECLARE_DTYPE_FIELD(out_dtype)
-      .add_enum("same", -1)
-      .set_default(-1)
-      .describe("Output data type, set to explicit type under mixed precision setting");
-    DMLC_DECLARE_FIELD(use_bias).set_default(true)
-      .describe("Whether the layer uses a bias vector.");
-    DMLC_DECLARE_FIELD(tile_size)
-      .describe("Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3)");
-  }
-  // constants
-  static const constexpr int kData = 0;
-  static const constexpr int kWeight = 1;
-  static const constexpr int kBias = 2;
-};
-
-struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
-  int channels;
-  TShape kernel_size;
-  TShape strides;
-  TShape padding;
-  TShape output_padding;
-  TShape dilation;
-  int groups;
-  std::string layout;
-  std::string kernel_layout;
-  int out_dtype;
-  bool use_bias;
-
-  DMLC_DECLARE_PARAMETER(Conv2DTransposeParam) {
-    DMLC_DECLARE_FIELD(channels)
-      .describe("The dimensionality of the output space"
-                "i.e. the number of output channels in the convolution.");
-    DMLC_DECLARE_FIELD(kernel_size)
-      .describe("Specifies the dimensions of the convolution window.");
-    DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
-      .describe("Specifies the strides of the convolution.");
-    DMLC_DECLARE_FIELD(output_padding).set_default(TShape({0, 0}))
-      .describe("Zero-padding added to one side of the output.");
-    DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
-      .describe("If padding is non-zero, then the input is implicitly zero-padded"
-                "on both sides for padding number of points");
-    DMLC_DECLARE_FIELD(dilation).set_default(TShape({1, 1}))
-      .describe("Specifies the dilation rate to use for dilated convolution.");
-    DMLC_DECLARE_FIELD(groups).set_default(1)
-      .describe("Controls the connections between inputs and outputs."
-                "At groups=1, all inputs are convolved to all outputs."
-                "At groups=2, the operation becomes equivalent to having two convolution"
-                "layers side by side, each seeing half the input channels, and producing"
-                "half the output channels, and both subsequently concatenated.");
-    DMLC_DECLARE_FIELD(layout).set_default("NCHW")
-      .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Convolution is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(kernel_layout).set_default("OIHW")
-      .describe("Dimension ordering of data and weight. Can be 'OIHW', 'OIHW16o16i', etc."
-                "'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
-                "dimensions respectively.");
-    DMLC_DECLARE_DTYPE_FIELD(out_dtype)
-        .add_enum("same", -1)
-        .set_default(-1)
-        .describe("Output data type, set to explicit type under mixed precision setting");
-    DMLC_DECLARE_FIELD(use_bias).set_default(true)
-      .describe("Whether the layer uses a bias vector.");
-  }
-  // constants
-  static const constexpr int kData = 0;
-  static const constexpr int kWeight = 1;
-  static const constexpr int kBias = 2;
-};
-
-
-struct MaxPool2DParam : public dmlc::Parameter<MaxPool2DParam> {
-  TShape pool_size;
-  TShape strides;
-  TShape padding;
-  std::string layout;
-  bool ceil_mode;
-
-  DMLC_DECLARE_PARAMETER(MaxPool2DParam) {
-    DMLC_DECLARE_FIELD(pool_size)
-      .describe("Size of the pooling windows..");
-    DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
-      .describe("Specifies the strides of the convolution.");
-    DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
-      .describe("If padding is non-zero, then the input is implicitly zero-padded"
-                "Padding support both symmetric and asymmetric as"
-                "one int : same padding used on all sides"
-                "two int : bottom, right will use same padding as top, left"
-                "four int : padding width in the order of (top, left, bottom, right)");
-    DMLC_DECLARE_FIELD(layout).set_default("NCHW")
-      .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Convolution is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(ceil_mode).set_default(false)
-      .describe("When true, will use ceil instead of floor to compute the output shape.");
-  }
-};
-
-
-struct AvgPool2DParam : public dmlc::Parameter<AvgPool2DParam> {
-  TShape pool_size;
-  TShape strides;
-  TShape padding;
-  std::string layout;
-  bool ceil_mode;
-  bool count_include_pad;
-
-  DMLC_DECLARE_PARAMETER(AvgPool2DParam) {
-    DMLC_DECLARE_FIELD(pool_size)
-      .describe("Size of the pooling windows..");
-    DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
-      .describe("Specifies the strides of the convolution.");
-    DMLC_DECLARE_FIELD(padding).set_default(TShape({0, 0}))
-      .describe("If padding is non-zero, then the input is implicitly zero-padded"
-                "Padding support both symmetric and asymmetric as"
-                "one int : same padding used on all sides"
-                "two int : bottom, right will use same padding as top, left"
-                "four int : padding width in the order of (top, left, bottom, right)");
-    DMLC_DECLARE_FIELD(layout).set_default("NCHW")
-      .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Convolution is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(ceil_mode).set_default(false)
-      .describe("When true, will use ceil instead of floor to compute the output shape.");
-    DMLC_DECLARE_FIELD(count_include_pad).set_default(false)
-      .describe("When true, will include padding to compute the average");
-  }
-};
-
-
-struct GlobalPool2DParam : public dmlc::Parameter<GlobalPool2DParam> {
-  std::string layout;
-
-  DMLC_DECLARE_PARAMETER(GlobalPool2DParam) {
-    DMLC_DECLARE_FIELD(layout).set_default("NCHW")
-      .describe("Dimension ordering of data and weight. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Convolution is applied on the 'H' and"
-                "'W' dimensions.");
-  }
-};
-
-struct UpSamplingParam : public dmlc::Parameter<UpSamplingParam> {
-  int scale;
-  std::string layout;
-  std::string method;
-
-  DMLC_DECLARE_PARAMETER(UpSamplingParam) {
-    DMLC_DECLARE_FIELD(scale)
-      .describe("upsampling scaling factor");
-    DMLC_DECLARE_FIELD(layout)
-      .set_default("NCHW")
-      .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Upsampling is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(method)
-      .set_default("NEAREST_NEIGHBOR")
-      .describe("Specify the mode to use for scaling."
-                "NEAREST_NEIGHBOR -  Nearest Neighbor"
-                "BILINEAR - Bilinear Interpolation");
-  }
-};
-
-struct LayoutTransformParam : public dmlc::Parameter<LayoutTransformParam> {
-  std::string src_layout;
-  std::string dst_layout;
-
-  DMLC_DECLARE_PARAMETER(LayoutTransformParam) {
-    DMLC_DECLARE_FIELD(src_layout).set_default("__undef__")
-    .describe("Dimension ordering of data");
-    DMLC_DECLARE_FIELD(dst_layout).set_default("__undef__")
-    .describe("Dimension ordering of data.");
-  }
-};
-
-struct MultiBoxPriorParam : public dmlc::Parameter<MultiBoxPriorParam> {
-  Tuple<float> sizes;
-  Tuple<float> ratios;
-  Tuple<float> steps;
-  Tuple<float> offsets;
-  bool clip;
-
-  DMLC_DECLARE_PARAMETER(MultiBoxPriorParam) {
-    DMLC_DECLARE_FIELD(sizes).set_default(Tuple<float>({1.0}))
-      .describe("List of sizes of generated MultiBoxPriores.");
-    DMLC_DECLARE_FIELD(ratios).set_default(Tuple<float>({1.0}))
-    .describe("List of aspect ratios of generated MultiBoxPriores.");
-    DMLC_DECLARE_FIELD(steps).set_default(Tuple<float>({-1.0, -1.0}))
-    .describe("Priorbox step across y and x, -1 for auto calculation.");
-    DMLC_DECLARE_FIELD(offsets).set_default(Tuple<float>({0.5, 0.5}))
-    .describe("Priorbox center offsets, y and x respectively.");
-    DMLC_DECLARE_FIELD(clip).set_default(false)
-    .describe("Whether to clip out-of-boundary boxes.");
-  }
-};
-
-struct MultiBoxTransformLocParam : public dmlc::Parameter<MultiBoxTransformLocParam> {
-  bool clip;
-  float threshold;
-  Tuple<float> variances;
-  DMLC_DECLARE_PARAMETER(MultiBoxTransformLocParam) {
-    DMLC_DECLARE_FIELD(clip).set_default(true)
-      .describe("Clip out-of-boundary boxes.");
-    DMLC_DECLARE_FIELD(threshold).set_default(0.01)
-    .describe("Threshold to be a positive prediction.");
-    DMLC_DECLARE_FIELD(variances).set_default(Tuple<float>({0.1f, 0.1f, 0.2f, 0.2f}))
-    .describe("Variances to be decoded from box regression output.");
-  }
-};
-
-struct NonMaximumSuppressionParam : public dmlc::Parameter<NonMaximumSuppressionParam> {
-  bool return_indices;
-  float iou_threshold;
-  bool force_suppress;
-  int top_k;
-  int id_index;
-  int coord_start;
-  int score_index;
-  int max_output_size;
-  bool invalid_to_bottom;
-  DMLC_DECLARE_PARAMETER(NonMaximumSuppressionParam) {
-    DMLC_DECLARE_FIELD(max_output_size).set_default(-1)
-      .describe("Max number of output valid boxes for each instance."
-                "By default all valid boxes are returned.");
-    DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5)
-      .describe("Non-maximum suppression threshold.");
-    DMLC_DECLARE_FIELD(force_suppress).set_default(false)
-      .describe("Suppress all detections regardless of class_id.");
-    DMLC_DECLARE_FIELD(top_k).set_default(-1)
-      .describe("Keep maximum top k detections before nms, -1 for no limit.");
-    DMLC_DECLARE_FIELD(coord_start).set_default(2)
-      .describe("Start index of the consecutive 4 coordinates.");
-    DMLC_DECLARE_FIELD(score_index).set_default(1)
-      .describe("Index of the scores/confidence of boxes.");
-    DMLC_DECLARE_FIELD(id_index).set_default(0)
-      .describe("Axis index of id.");
-    DMLC_DECLARE_FIELD(return_indices).set_default(true)
-      .describe("Whether to return box indices in input data.");
-    DMLC_DECLARE_FIELD(invalid_to_bottom).set_default(false)
-      .describe("Whether to move all invalid bounding boxes to the bottom.");
-  }
-};
-
-struct LRNParam : public dmlc::Parameter<LRNParam> {
-  int size;
-  int axis;
-  float alpha;
-  float beta;
-  float bias;
-
-  DMLC_DECLARE_PARAMETER(LRNParam) {
-    DMLC_DECLARE_FIELD(size)
-      .describe("The size of the local region to be considered for normalization.");
-    DMLC_DECLARE_FIELD(axis)
-      .describe("input data layout channel axis");
-    DMLC_DECLARE_FIELD(alpha)
-      .describe("The scaling parameter.");
-    DMLC_DECLARE_FIELD(beta)
-      .describe("The exponent parameter.");
-    DMLC_DECLARE_FIELD(bias)
-      .describe("The offset parameter.");
-  }
-  // constants
-  static const constexpr int kData = 0;
-};
-
-struct L2NormalizeParam : public dmlc::Parameter<L2NormalizeParam> {
-  float eps;
-  Tuple<int> axis;
-
-  DMLC_DECLARE_PARAMETER(L2NormalizeParam) {
-    DMLC_DECLARE_FIELD(eps)
-      .describe("float type epsilon value.");
-    DMLC_DECLARE_FIELD(axis)
-      .describe("axis over the normalization applied");
-  }
-};
-
-}  // namespace top
-}  // namespace nnvm
-
-#endif  // NNVM_TOP_NN_H_
diff --git a/nnvm/include/nnvm/top/tensor.h b/nnvm/include/nnvm/top/tensor.h
deleted file mode 100644
index 51cb6e7edfb9..000000000000
--- a/nnvm/include/nnvm/top/tensor.h
+++ /dev/null
@@ -1,327 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nnvm/top/tensor.h
- * \brief Auxiliary param for tensor primitive.
- */
-#ifndef NNVM_TOP_TENSOR_H_
-#define NNVM_TOP_TENSOR_H_
-
-#include <dmlc/base.h>
-#include <dmlc/parameter.h>
-#include <nnvm/tuple.h>
-
-namespace nnvm {
-namespace top {
-
-struct ConcatenateParam : public dmlc::Parameter<ConcatenateParam> {
-  int axis;
-  DMLC_DECLARE_PARAMETER(ConcatenateParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(1)
-    .describe("the axis to be concated.");
-  }
-};
-
-struct ExpandDimsParam : public dmlc::Parameter<ExpandDimsParam> {
-  int axis;
-  int num_newaxis;
-  DMLC_DECLARE_PARAMETER(ExpandDimsParam) {
-    DMLC_DECLARE_FIELD(axis)
-    .describe("the axis to be expanded.");
-    DMLC_DECLARE_FIELD(num_newaxis).set_lower_bound(1).set_default(1)
-    .describe("Number of new axis to be inserted.");
-  }
-};
-
-struct SplitParam : public dmlc::Parameter<SplitParam> {
-  // numpy convention, only support indices, not support list.
-  Tuple<int> indices_or_sections;
-  int axis;
-  // additional hint whether it is equal_split mode
-  // deduced from indices_or_sections
-  bool equal_split;
-
-  DMLC_DECLARE_PARAMETER(SplitParam) {
-    DMLC_DECLARE_FIELD(indices_or_sections)
-        .describe("Number of outputs to be splitted");
-    DMLC_DECLARE_FIELD(axis).set_default(1)
-        .describe("the axis to be splitted.");
-  }
-};
-
-
-struct TakeParam : public dmlc::Parameter<TakeParam> {
-  dmlc::optional<int> axis;
-
-  DMLC_DECLARE_PARAMETER(TakeParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(dmlc::optional<int>())
-        .describe("the axis over which to select values.");
-  }
-};
-
-struct StridedSliceParam : public dmlc::Parameter<StridedSliceParam> {
-  // numpy convention, only support indices, not support list.
-  Tuple<int64_t> begin;
-  Tuple<int64_t> end;
-  Tuple<int64_t> stride;
-
-  DMLC_DECLARE_PARAMETER(StridedSliceParam) {
-    DMLC_DECLARE_FIELD(begin)
-        .describe("Indices for begin of slice");
-    DMLC_DECLARE_FIELD(end)
-        .describe("Indices for end of the slice");
-    DMLC_DECLARE_FIELD(stride).set_default(Tuple<int64_t>())
-        .describe("Stride values of the slice");
-  }
-};
-
-enum TypeFlag {
-  kFloat32 = 0,
-  kFloat64 = 1,
-  kFloat16 = 2,
-  kUint8 = 3,
-  kInt32 = 4,
-  kInt8  = 5,
-  kInt64 = 6,
-  // kBool = 7,
-  // 7 is reserved for kBool, in order to keep consistency with MXNet TypeFlag defined in
-  // https://github.com/apache/incubator-mxnet/blob/master/3rdparty/mshadow/mshadow/base.h#L314
-  kInt16 = 8,
-  kUint16 = 9,
-  kUint32 = 10,
-  kUint64 = 11,
-  kBfloat16 = 12,
-};
-
-enum IndicatorRuleFlag {
-  kGT0 = 0,
-  kLT0 = 1,
-  kMax = 2,
-  kMin = 3,
-};
-
-#define DMLC_DECLARE_DTYPE_FIELD(name)                              \
-  DMLC_DECLARE_FIELD(name)                                          \
-  .add_enum("float16", kFloat16)                                    \
-  .add_enum("float32", kFloat32)                                    \
-  .add_enum("float64", kFloat64)                                    \
-  .add_enum("uint8",  kUint8)                                       \
-  .add_enum("uint16", kUint16)                                      \
-  .add_enum("uint32", kUint32)                                      \
-  .add_enum("uint64", kUint64)                                      \
-  .add_enum("int8",  kInt8)                                         \
-  .add_enum("int16", kInt16)                                        \
-  .add_enum("int32", kInt32)                                        \
-  .add_enum("int64", kInt64)                                        \
-  .add_enum("bfloat16", kBfloat16)
-
-struct CastParam : public dmlc::Parameter<CastParam> {
-  int dtype;
-  DMLC_DECLARE_PARAMETER(CastParam) {
-    DMLC_DECLARE_DTYPE_FIELD(dtype)
-    .describe("Output data type.");
-  }
-};
-
-struct IndicatorParam : public dmlc::Parameter<IndicatorParam> {
-  TShape axis;
-  bool exclude;
-  DMLC_DECLARE_PARAMETER(IndicatorParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(TShape())
-    .describe(R"code(The axis or axes along which to perform the indicator rule.
-
-        The default, `axis=()`, will compute over all elements into a
-        scalar array with shape `(1,)`.
-
-        If `axis` is int, rule is applied on a particular axis.
-
-        If `axis` is a tuple of ints, rule is applied on all the axes
-        specified in the tuple.
-
-        If `exclude` is true, rule will be applied on the axes that are
-        NOT in axis instead.)code");
-    DMLC_DECLARE_FIELD(exclude).set_default(false)
-    .describe("Whether to apply rule on axis that are NOT in axis instead.");
-  }
-};
-
-struct ReshapeParam : public dmlc::Parameter<ReshapeParam> {
-  Tuple<int64_t> shape;
-
-  DMLC_DECLARE_PARAMETER(ReshapeParam) {
-    DMLC_DECLARE_FIELD(shape);
-  }
-};
-
-struct SqueezeParam : public dmlc::Parameter<SqueezeParam> {
-  TShape axis;
-
-  DMLC_DECLARE_PARAMETER(SqueezeParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(TShape())
-    .describe("The axis to squeeze in the input tensor.");
-  }
-};
-
-struct ScalarParam : public dmlc::Parameter<ScalarParam> {
-  double scalar;
-
-  DMLC_DECLARE_PARAMETER(ScalarParam) {
-    DMLC_DECLARE_FIELD(scalar);
-  }
-};
-
-struct FillValueParam : public dmlc::Parameter<FillValueParam> {
-  double fill_value;
-
-  DMLC_DECLARE_PARAMETER(FillValueParam) {
-    DMLC_DECLARE_FIELD(fill_value)
-    .describe("Scalar value to be filled");
-  }
-};
-
-struct TransposeParam : public dmlc::Parameter<TransposeParam> {
-  TShape axes;
-
-  DMLC_DECLARE_PARAMETER(TransposeParam) {
-    DMLC_DECLARE_FIELD(axes).set_default(TShape())
-    .describe("Target axis order. By default the axes will be inverted.");
-  }
-};
-
-struct FlipParam : public dmlc::Parameter<FlipParam> {
-  int axis;
-  DMLC_DECLARE_PARAMETER(FlipParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(0)
-    .describe("the axis to be reveresed.");
-  }
-};
-
-struct BroadcastToParam : public dmlc::Parameter<BroadcastToParam> {
-  TShape shape;
-
-  DMLC_DECLARE_PARAMETER(BroadcastToParam) {
-    DMLC_DECLARE_FIELD(shape).set_default(TShape())
-      .describe("The shape of the desired array."
-                " We can set the dim to zero if it's same as the original."
-                " E.g `A = broadcast_to(B, shape=(10, 0, 0))` ");
-  }
-};
-
-struct ReduceParam : public dmlc::Parameter<ReduceParam> {
-  TShape axis;
-  bool keepdims;
-  bool exclude;
-  int dtype;
-
-  DMLC_DECLARE_PARAMETER(ReduceParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(TShape())
-        .describe(R"code(The axis or axes along which to perform the reduction.
-
-      The default, `axis=()`, will compute over all elements into a
-      scalar array with shape `(1,)`.
-
-      If `axis` is int, a reduction is performed on a particular axis.
-
-      If `axis` is a tuple of ints, a reduction is performed on all the axes
-      specified in the tuple.
-
-      If `exclude` is true, reduction will be performed on the axes that are
-      NOT in axis instead.)code");
-
-    DMLC_DECLARE_FIELD(keepdims).set_default(false)
-      .describe("If this is set to `True`, the reduced axes are left "
-                "in the result as dimension with size one.");
-    DMLC_DECLARE_FIELD(exclude).set_default(false)
-      .describe("Whether to perform reduction on axis that are NOT in axis instead.");
-    DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kInt32)
-      .describe("Target data type.");
-  }
-};
-
-struct InitOpWithScalarParam : public dmlc::Parameter<InitOpWithScalarParam> {
-  TShape shape;
-  int dtype;
-  double fill_value;
-
-  DMLC_DECLARE_PARAMETER(InitOpWithScalarParam) {
-    DMLC_DECLARE_FIELD(shape).set_default(TShape());
-    DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
-      .describe("Target data type.");
-    DMLC_DECLARE_FIELD(fill_value).describe("Scalar value to fill");
-  }
-};
-
-struct InitOpParam : public dmlc::Parameter<InitOpParam> {
-  TShape shape;
-  int dtype;
-
-  DMLC_DECLARE_PARAMETER(InitOpParam) {
-    DMLC_DECLARE_FIELD(shape).set_default(TShape());
-    DMLC_DECLARE_DTYPE_FIELD(dtype).set_default(kFloat32)
-      .describe("Target data type.");
-  }
-};
-
-struct ElementWiseReduceParam : public dmlc::Parameter<ElementWiseReduceParam> {
-  int num_args;
-  DMLC_DECLARE_PARAMETER(ElementWiseReduceParam) {
-    DMLC_DECLARE_FIELD(num_args).set_lower_bound(1)
-      .describe("Number of inputs to be reduced.");
-  }
-};
-
-struct MatMulParam : public dmlc::Parameter<MatMulParam> {
-  bool transpose_a;
-  bool transpose_b;
-
-  DMLC_DECLARE_PARAMETER(MatMulParam) {
-    DMLC_DECLARE_FIELD(transpose_a)
-      .describe("If true then transpose the first input before dot.")
-      .set_default(false);
-    DMLC_DECLARE_FIELD(transpose_b)
-      .describe("If true then transpose the second input before dot.")
-      .set_default(false);
-  }
-};
-
-struct ClipParam : public dmlc::Parameter<ClipParam> {
-  double a_min, a_max;
-  DMLC_DECLARE_PARAMETER(ClipParam) {
-    DMLC_DECLARE_FIELD(a_min)
-      .describe("Minimum value such that value smaller then this will be clipped.");
-    DMLC_DECLARE_FIELD(a_max)
-      .describe("Maximum value such that value larger then this will be clipped.");
-  }
-};
-
-struct SliceLikeParam : public dmlc::Parameter<SliceLikeParam> {
-  Tuple<int> axis;
-  DMLC_DECLARE_PARAMETER(SliceLikeParam) {
-    DMLC_DECLARE_FIELD(axis).set_default(Tuple<int>())
-      .describe("List of axes on which input data will be sliced according to the "
-                "corresponding size of the second input. By default will slice "
-                "on all axes. Negative axes are supported.");
-  }
-};
-
-}  // namespace top
-}  // namespace nnvm
-
-#endif  // NNVM_TOP_TENSOR_H_
diff --git a/nnvm/python/.gitignore b/nnvm/python/.gitignore
deleted file mode 100644
index 40d7cb4cc13a..000000000000
--- a/nnvm/python/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-*.c
-*.cpp
diff --git a/nnvm/python/nnvm/__init__.py b/nnvm/python/nnvm/__init__.py
deleted file mode 100644
index 450058449e3a..000000000000
--- a/nnvm/python/nnvm/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-#!/usr/bin/env python
-# coding: utf-8
-"""NNVM python API for ease of use and help new framework establish python API. """
-from __future__ import absolute_import as _abs
-import warnings
-
-from . import _base
-from . import symbol as sym
-from . import symbol
-from ._base import NNVMError
-from . import frontend
-
-__version__ = _base.__version__
-
-warnings.warn("NNVM is deprecated and will be removed in a future version. Use Relay instead.",
-              FutureWarning)
diff --git a/nnvm/python/nnvm/_base.py b/nnvm/python/nnvm/_base.py
deleted file mode 100644
index 420392f17e92..000000000000
--- a/nnvm/python/nnvm/_base.py
+++ /dev/null
@@ -1,215 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable=invalid-name, unused-import
-""" ctypes library of nnvm and helper functions """
-from __future__ import absolute_import
-
-import os
-import sys
-import ctypes
-import numpy as np
-from . import libinfo
-
-try:
-    import tvm
-except ImportError:
-    pass
-
-#----------------------------
-# library loading
-#----------------------------
-if sys.version_info[0] == 3:
-    string_types = str
-    numeric_types = (float, int, np.float32, np.int32)
-    # this function is needed for python3
-    # to convert ctypes.char_p .value back to python str
-    py_str = lambda x: x.decode('utf-8')
-else:
-    string_types = basestring
-    numeric_types = (float, int, long, np.float32, np.int32)
-    py_str = lambda x: x
-
-
-class NNVMError(Exception):
-    """Error that will be throwed by all nnvm functions"""
-
-
-def _load_lib():
-    """Load libary by searching possible path."""
-    lib_path = libinfo.find_lib_path()
-    lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_LOCAL)
-    # DMatrix functions
-    lib.NNGetLastError.restype = ctypes.c_char_p
-    return lib
-
-# version number
-__version__ = libinfo.__version__
-# library instance of nnvm
-_LIB = _load_lib()
-# The FFI mode of TVM
-_FFI_MODE = os.environ.get("TVM_FFI", "auto")
-
-# type definitions
-nn_uint = ctypes.c_uint
-OpHandle = ctypes.c_void_p
-SymbolHandle = ctypes.c_void_p
-GraphHandle = ctypes.c_void_p
-
-# Global dict of str to symbol to initialize variables
-_all_var_init = {}
-
-#----------------------------
-# helper function definition
-#----------------------------
-def check_call(ret):
-    """Check the return value of C API call
-
-    This function will raise exception when error occurs.
-    Wrap every API call with this function
-
-    Parameters
-    ----------
-    ret : int
-        return value from API calls
-    """
-    if ret != 0:
-        raise NNVMError(py_str(_LIB.NNGetLastError()))
-
-def c_str(string):
-    """Create ctypes char * from a python string
-    Parameters
-    ----------
-    string : string type
-        python string
-
-    Returns
-    -------
-    str : c_char_p
-        A char pointer that can be passed to C API
-    """
-    return ctypes.c_char_p(string.encode('utf-8'))
-
-
-def c_array(ctype, values):
-    """Create ctypes array from a python array
-
-    Parameters
-    ----------
-    ctype : ctypes data type
-        data type of the array we want to convert to
-
-    values : tuple or list
-        data content
-
-    Returns
-    -------
-    out : ctypes array
-        Created ctypes array
-    """
-    return (ctype * len(values))(*values)
-
-def ctypes2buffer(cptr, length):
-    """Convert ctypes pointer to buffer type.
-
-    Parameters
-    ----------
-    cptr : ctypes.POINTER(ctypes.c_char)
-        pointer to the raw memory region
-    length : int
-        the length of the buffer
-
-    Returns
-    -------
-    buffer : bytearray
-        The raw byte memory buffer
-    """
-    if not isinstance(cptr, ctypes.POINTER(ctypes.c_char)):
-        raise TypeError('expected char pointer')
-    res = bytearray(length)
-    rptr = (ctypes.c_char * length).from_buffer(res)
-    if not ctypes.memmove(rptr, cptr, length):
-        raise RuntimeError('memmove failed')
-    return res
-
-def ctypes2numpy_shared(cptr, shape):
-    """Convert a ctypes pointer to a numpy array
-
-    The result numpy array shares the memory with the pointer
-
-    Parameters
-    ----------
-    cptr : ctypes.POINTER(mx_float)
-        pointer to the memory region
-
-    shape : tuple
-        shape of target ndarray
-
-    Returns
-    -------
-    out : numpy_array
-        A numpy array : numpy array
-    """
-    if not isinstance(cptr, ctypes.POINTER(mx_float)):
-        raise RuntimeError('expected float pointer')
-    size = 1
-    for s in shape:
-        size *= s
-    dbuffer = (mx_float * size).from_address(ctypes.addressof(cptr.contents))
-    return np.frombuffer(dbuffer, dtype=np.float32).reshape(shape)
-
-
-def ctypes2docstring(num_args, arg_names, arg_types, arg_descs, remove_dup=True):
-    """Convert ctypes returned doc string information into parameters docstring.
-
-    num_args : nn_uint
-        Number of arguments.
-
-    arg_names : ctypes.POINTER(ctypes.c_char_p)
-        Argument names.
-
-    arg_types : ctypes.POINTER(ctypes.c_char_p)
-        Argument type information.
-
-    arg_descs : ctypes.POINTER(ctypes.c_char_p)
-        Argument description information.
-
-    remove_dup : boolean, optional
-        Whether remove duplication or not.
-
-    Returns
-    -------
-    docstr : str
-        Python docstring of parameter sections.
-    """
-    param_keys = set()
-    param_str = []
-    for i in range(num_args.value):
-        key = py_str(arg_names[i])
-        if key in param_keys and remove_dup:
-            continue
-        param_keys.add(key)
-        type_info = py_str(arg_types[i])
-        ret = '%s : %s' % (key, type_info)
-        if arg_descs[i]:
-            ret += '\n    ' + py_str(arg_descs[i])
-        param_str.append(ret)
-    doc_str = ('Parameters\n' +
-               '----------\n' +
-               '%s\n')
-    doc_str = doc_str % ('\n'.join(param_str))
-    return doc_str
diff --git a/nnvm/python/nnvm/_ctypes/README b/nnvm/python/nnvm/_ctypes/README
deleted file mode 100644
index 6e82cb962f99..000000000000
--- a/nnvm/python/nnvm/_ctypes/README
+++ /dev/null
@@ -1 +0,0 @@
-Ctypes specific implementation of certain modules
\ No newline at end of file
diff --git a/nnvm/python/nnvm/_ctypes/__init__.py b/nnvm/python/nnvm/_ctypes/__init__.py
deleted file mode 100644
index ea196643ae2f..000000000000
--- a/nnvm/python/nnvm/_ctypes/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-""""ctypes implementation of the Symbol"""
diff --git a/nnvm/python/nnvm/_ctypes/symbol.py b/nnvm/python/nnvm/_ctypes/symbol.py
deleted file mode 100644
index 8c7d58a65920..000000000000
--- a/nnvm/python/nnvm/_ctypes/symbol.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines,
-# pylint: disable=len-as-condition, consider-iterating-dictionary
-"""Symbolic configuration API."""
-from __future__ import absolute_import as _abs
-
-import copy
-import ctypes
-import sys
-from .._base import _LIB
-from .._base import c_array, c_str, nn_uint, py_str
-from .._base import SymbolHandle, OpHandle
-from .._base import check_call, ctypes2docstring
-from ..name import NameManager
-from ..attribute import AttrScope
-
-class SymbolBase(object):
-    """Symbol is symbolic graph."""
-    __slots__ = ["handle"]
-    # pylint: disable=no-member
-    def __init__(self, handle):
-        """Initialize the function with handle
-
-        Parameters
-        ----------
-        handle : SymbolHandle
-            the handle to the underlying C++ Symbol
-        """
-        self.handle = handle
-
-    def __del__(self):
-        check_call(_LIB.NNSymbolFree(self.handle))
-
-    def __call__(self, *args, **kwargs):
-        """Invoke symbol as function on inputs.
-
-        Parameters
-        ----------
-        args:
-            provide positional arguments
-
-        kwargs:
-            provide keyword arguments
-        Returns
-        -------
-        the resulting symbol
-        """
-        s = copy.deepcopy(self)
-        s._compose(*args, **kwargs)
-        return s
-
-    def _compose(self, *args, **kwargs):
-        """Compose symbol on inputs.
-
-        This call mutates the current symbol.
-
-        Parameters
-        ----------
-        args:
-            provide positional arguments
-
-        kwargs:
-            provide keyword arguments
-
-        Returns
-        -------
-        the resulting symbol
-        """
-        name = kwargs.pop('name', None)
-
-        if name:
-            name = c_str(name)
-        if len(args) != 0 and len(kwargs) != 0:
-            raise TypeError('compose only accept input Symbols \
-                either as positional or keyword arguments, not both')
-
-        for arg in args:
-            if not isinstance(arg, SymbolBase):
-                raise TypeError('Compose expect `Symbol` as arguments')
-        for val in kwargs.values():
-            if not isinstance(val, SymbolBase):
-                raise TypeError('Compose expect `Symbol` as arguments')
-
-        num_args = len(args) + len(kwargs)
-        if len(kwargs) != 0:
-            keys = c_array(ctypes.c_char_p, [c_str(key) for key in kwargs.keys()])
-            args = c_array(SymbolHandle, [s.handle for s in kwargs.values()])
-        else:
-            keys = None
-            args = c_array(SymbolHandle, [s.handle for s in args])
-        check_call(_LIB.NNSymbolCompose(
-            self.handle, name, num_args, keys, args))
-
-    def _set_attr(self, **kwargs):
-        """Set the attribute of the symbol.
-
-        Parameters
-        ----------
-        **kwargs
-            The attributes to set
-        """
-        keys = c_array(ctypes.c_char_p,
-                       [c_str(key) for key in kwargs.keys()])
-        vals = c_array(ctypes.c_char_p,
-                       [c_str(str(val)) for val in kwargs.values()])
-        num_args = nn_uint(len(kwargs))
-        check_call(_LIB.NNSymbolSetAttrs(
-            self.handle, num_args, keys, vals))
-
-
-_symbol_cls = SymbolBase
-
-def _set_symbol_class(cls):
-    global _symbol_cls
-    _symbol_cls = cls
-
-
-def _make_atomic_symbol_function(handle, name):
-    """Create an atomic symbol function by handle and funciton name."""
-    real_name = ctypes.c_char_p()
-    desc = ctypes.c_char_p()
-    num_args = nn_uint()
-    arg_names = ctypes.POINTER(ctypes.c_char_p)()
-    arg_types = ctypes.POINTER(ctypes.c_char_p)()
-    arg_descs = ctypes.POINTER(ctypes.c_char_p)()
-    ret_type = ctypes.c_char_p()
-
-    check_call(_LIB.NNGetOpInfo(
-        handle, ctypes.byref(real_name), ctypes.byref(desc),
-        ctypes.byref(num_args),
-        ctypes.byref(arg_names),
-        ctypes.byref(arg_types),
-        ctypes.byref(arg_descs),
-        ctypes.byref(ret_type)))
-    param_str = ctypes2docstring(num_args, arg_names, arg_types, arg_descs)
-    func_name = name
-    desc = py_str(desc.value)
-
-    doc_str = ('%s\n\n' +
-               '%s\n' +
-               'Returns\n' +
-               '-------\n' +
-               'result: Tensor\n' +
-               '    The result Tensor.')
-    doc_str = doc_str % (desc, param_str)
-
-    def creator(*args, **kwargs):
-        """Activation Operator of Neural Net.
-        The parameters listed below can be passed in as keyword arguments.
-
-        Parameters
-        ----------
-        name : string, required.
-            Name of the resulting symbol.
-
-        Returns
-        -------
-        symbol: Symbol
-            the resulting symbol
-        """
-        param_keys = []
-        param_vals = []
-        symbol_kwargs = {}
-        name = kwargs.pop('name', None)
-        attr = kwargs.pop('attr', None)
-
-        for k, v in kwargs.items():
-            if isinstance(v, SymbolBase):
-                symbol_kwargs[k] = v
-            else:
-                param_keys.append(c_str(k))
-                param_vals.append(c_str(str(v)))
-        # create atomic symbol
-        param_keys = c_array(ctypes.c_char_p, param_keys)
-        param_vals = c_array(ctypes.c_char_p, param_vals)
-        sym_handle = SymbolHandle()
-        check_call(_LIB.NNSymbolCreateAtomicSymbol(
-            handle,
-            nn_uint(len(param_keys)),
-            param_keys, param_vals,
-            ctypes.byref(sym_handle)))
-
-        if len(args) != 0 and len(symbol_kwargs) != 0:
-            raise TypeError(
-                '%s can only accept input'
-                'Symbols either as positional or keyword arguments, not both' % func_name)
-        s = _symbol_cls(sym_handle)
-        attr = AttrScope.current.get(attr)
-        if attr:
-            s._set_attr(**attr)
-        hint = func_name.lower()
-        name = NameManager.current.get(name, hint)
-        s._compose(*args, name=name, **symbol_kwargs)
-        return s
-
-    creator.__name__ = func_name
-    creator.__doc__ = doc_str
-    return creator
-
-
-def _init_symbol_module(symbol_class, root_namespace):
-    """List and add all the atomic symbol functions to current module."""
-    _set_symbol_class(symbol_class)
-    plist = ctypes.POINTER(ctypes.c_char_p)()
-    size = ctypes.c_uint()
-
-    check_call(_LIB.NNListAllOpNames(ctypes.byref(size),
-                                     ctypes.byref(plist)))
-    op_names = []
-    for i in range(size.value):
-        op_names.append(py_str(plist[i]))
-
-    module_obj = sys.modules["%s.symbol" % root_namespace]
-    module_obj_contrib = sys.modules["%s.contrib" % root_namespace]
-    module_internal = sys.modules["%s._symbol_internal" % root_namespace]
-    for name in op_names:
-        hdl = OpHandle()
-        check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
-        function = _make_atomic_symbol_function(hdl, name)
-        if function.__name__.startswith('_contrib_'):
-            setattr(module_obj_contrib, function.__name__.split('_contrib_')[1], function)
-        elif function.__name__.startswith('_'):
-            setattr(module_internal, function.__name__, function)
-            setattr(module_obj, function.__name__, function)
-        else:
-            setattr(module_obj, function.__name__, function)
diff --git a/nnvm/python/nnvm/_cy2/README b/nnvm/python/nnvm/_cy2/README
deleted file mode 100644
index ed4639b674a0..000000000000
--- a/nnvm/python/nnvm/_cy2/README
+++ /dev/null
@@ -1 +0,0 @@
-This folder is by default empty and will hold DLLs generated by cython.
diff --git a/nnvm/python/nnvm/_cy2/__init__.py b/nnvm/python/nnvm/_cy2/__init__.py
deleted file mode 100644
index 1961cd9ff613..000000000000
--- a/nnvm/python/nnvm/_cy2/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Namespace for cython generated modules for python2"""
diff --git a/nnvm/python/nnvm/_cy3/README b/nnvm/python/nnvm/_cy3/README
deleted file mode 100644
index dc3a57603782..000000000000
--- a/nnvm/python/nnvm/_cy3/README
+++ /dev/null
@@ -1 +0,0 @@
-This folder is by default empty and will hold DLLs generated by cython.
\ No newline at end of file
diff --git a/nnvm/python/nnvm/_cy3/__init__.py b/nnvm/python/nnvm/_cy3/__init__.py
deleted file mode 100644
index c9a495225351..000000000000
--- a/nnvm/python/nnvm/_cy3/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Cython generated modules"""
diff --git a/nnvm/python/nnvm/_symbol_internal.py b/nnvm/python/nnvm/_symbol_internal.py
deleted file mode 100644
index de2f85aa2f29..000000000000
--- a/nnvm/python/nnvm/_symbol_internal.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Module space to register internal functions. Leave empty"""
diff --git a/nnvm/python/nnvm/attribute.py b/nnvm/python/nnvm/attribute.py
deleted file mode 100644
index 14341794bb64..000000000000
--- a/nnvm/python/nnvm/attribute.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-"""Attribute scoping support for symbolic API."""
-from __future__ import absolute_import
-
-from ._base import string_types
-
-class AttrScope(object):
-    """Attribute manager for scoping.
-
-    User can also inherit this object to change naming behavior.
-
-    Parameters
-    ----------
-    kwargs
-        The attributes to set for all symbol creations in the scope.
-    """
-    current = None
-
-    def __init__(self, **kwargs):
-        self._old_scope = None
-        for value in kwargs.values():
-            if not isinstance(value, string_types):
-                raise ValueError("Attributes need to be string")
-        self._attr = kwargs
-
-    def get(self, attr):
-        """
-        Get the attribute dict given the attribute set by the symbol.
-
-        Parameters
-        ----------
-        attr : dict of string to string
-            The attribute passed in by user during symbol creation.
-
-        Returns
-        -------
-        attr : dict of string to string
-            Updated attributes to add other scope related attributes.
-        """
-        if self._attr:
-            ret = self._attr.copy()
-            if attr:
-                ret.update(attr)
-            return ret
-        return attr
-
-    def __enter__(self):
-        # pylint: disable=protected-access
-        self._old_scope = AttrScope.current
-        attr = AttrScope.current._attr.copy()
-        attr.update(self._attr)
-        self._attr = attr
-        AttrScope.current = self
-        return self
-
-    def __exit__(self, ptype, value, trace):
-        assert self._old_scope
-        AttrScope.current = self._old_scope
-
-AttrScope.current = AttrScope()
diff --git a/nnvm/python/nnvm/compiler/__init__.py b/nnvm/python/nnvm/compiler/__init__.py
deleted file mode 100644
index 6a3e846c4496..000000000000
--- a/nnvm/python/nnvm/compiler/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""NNVM compiler toolchain.
-
-User only need to use :any:`build` and :any:`build_config` to do the compilation,
-and :any:`save_param_dict` to save the parameters into bytes.
-The other APIs are for more advanced interaction with the compiler toolchain.
-"""
-from __future__ import absolute_import
-
-import tvm
-
-from . import build_module
-from . build_module import build, optimize, build_config
-from . compile_engine import engine, graph_key
-from . param_dict import save_param_dict, load_param_dict
-
-from .. import symbol as _symbol
-from .. import graph as _graph
-
-from .. import top as _top
-
-
-tvm.register_extension(_symbol.Symbol, _symbol.Symbol)
-tvm.register_extension(_graph.Graph, _graph.Graph)
diff --git a/nnvm/python/nnvm/compiler/build_module.py b/nnvm/python/nnvm/compiler/build_module.py
deleted file mode 100644
index c52265f1adc4..000000000000
--- a/nnvm/python/nnvm/compiler/build_module.py
+++ /dev/null
@@ -1,466 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Namespace for building operators."""
-from __future__ import absolute_import as _abs
-
-import logging
-import tvm
-
-from tvm.contrib import graph_runtime
-from tvm import autotvm
-from . import graph_attr, graph_util
-from .. import graph as _graph
-from .. import symbol as sym
-from .._base import _all_var_init
-
-OPT_PASS_LEVEL = {
-    "SimplifyInference": 0,
-    "PrecomputePrune": 2,
-    "OpFusion": 1,
-    "FoldScaleAxis": 3,
-    "AlterOpLayout": 3,
-}
-
-# List of optimization pass and level when switch on
-class BuildConfig(object):
-    """Configuration scope to set a build config option.
-
-    Parameters
-    ----------
-    kwargs
-        Keyword arguments of configurations to set.
-    """
-    current = None
-    defaults = {
-        "opt_level": 2,
-        "add_pass": None,
-    }
-    def __init__(self, **kwargs):
-        self._old_scope = None
-        for k, _ in kwargs.items():
-            if k not in BuildConfig.defaults:
-                raise ValueError(
-                    "invalid argument %s, candidates are %s" % (k, BuildConfig.defaults.keys()))
-        self._attr = kwargs
-
-    def __getattr__(self, name):
-        if name not in self._attr:
-            return BuildConfig.defaults[name]
-        return self._attr[name]
-
-    def __enter__(self):
-        # pylint: disable=protected-access
-        self._old_scope = BuildConfig.current
-        attr = BuildConfig.current._attr.copy()
-        attr.update(self._attr)
-        self._attr = attr
-        BuildConfig.current = self
-        return self
-
-    def __exit__(self, ptype, value, trace):
-        assert self._old_scope
-        BuildConfig.current = self._old_scope
-
-    def pass_enabled(self, pass_name):
-        """Get whether pass is enabled.
-
-        Parameters
-        ----------
-        pass_name : str
-            The optimization pass name
-
-        Returns
-        -------
-        enabled : bool
-            Whether pass is enabled.
-        """
-        if self.add_pass and pass_name in self.add_pass:
-            return True
-        return self.opt_level >= OPT_PASS_LEVEL[pass_name]
-
-
-BuildConfig.current = BuildConfig()
-
-def build_config(**kwargs):
-    """Configure the build behavior by setting config variables.
-
-    Parameters
-    ----------
-    opt_level: int, default=2
-        Optimization level. See OPT_PASS_LEVEL for level of each pass.
-
-    add_pass: set of str
-        Optimization pass to be added regardless of optimization level.
-
-    Returns
-    -------
-    config: BuildConfig
-        The build configuration
-    """
-    return BuildConfig(**kwargs)
-
-
-@tvm.register_func("nnvm.compiler.lower")
-def _lower(sch, inputs, func_name, graph):
-    import traceback
-    # pylint: disable=broad-except
-    try:
-        f = tvm.lower(sch, inputs, name=func_name)
-        logging.debug("lower function %s", func_name)
-        logging.debug("%s", tvm.lower(sch, inputs, simple_mode=True))
-    except Exception:
-        msg = traceback.format_exc()
-        msg += "Error during compile graph\n"
-        msg += "--------------------------\n"
-        msg += graph.ir(join_entry_attrs=["shape"])
-        raise RuntimeError(msg)
-    return f if isinstance(
-        f, (tvm.container.Array, tuple, list)) else [f]
-
-
-@tvm.register_func("nnvm.compiler.build_target")
-def _build(funcs, target, target_host):
-    if target_host == "":
-        target_host = None
-    return tvm.build(funcs, target=target, target_host=target_host)
-
-
-def _update_shape_dtype(shape, dtype, params):
-    """Update shape dtype given params information"""
-    if not params:
-        return shape, dtype
-    shape = shape.copy()
-    shape.update({k : v.shape for k, v in params.items()})
-    if isinstance(dtype, str):
-        for k, v in params.items():
-            if v.dtype != dtype and v.shape:
-                raise ValueError(
-                    "%s: dtype not expected %s vs %s" % (k, dtype, v.dtype))
-    else:
-        dtype = dtype.copy()
-        dtype.update({k : str(v.dtype) for k, v in params.items()})
-    return shape, dtype
-
-
-def optimize(graph, shape, dtype="float32", layout=None):
-    """Perform target and parameter invariant graph optimization.
-
-    This is an advanced function that usually do not need to be called.
-    Call build instead.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to be used in optimized.
-
-    Returns
-    -------
-    graph : Graph
-        The optimized graph.
-    """
-    # pylint: disable=unused-argument
-    cfg = BuildConfig.current
-
-    if cfg.pass_enabled("AlterOpLayout"):
-        layout = layout if layout else {}
-        graph = graph_attr.set_layout_inputs(graph, layout)
-        graph = graph.apply(["CorrectLayout"])
-
-        graph = graph_attr.set_shape_inputs(graph, shape)
-        graph = graph_attr.set_dtype_inputs(graph, dtype)
-        graph = graph.apply(["InferShape", "InferType", "AlterOpLayout"])
-        graph = graph_attr.set_layout_inputs(graph, layout)
-        graph = graph.apply(["CorrectLayout"])
-
-    if cfg.pass_enabled("SimplifyInference"):
-        graph = graph_attr.set_shape_inputs(graph, shape)
-        graph = graph.apply(["InferShape", "SimplifyInference"])
-
-    if cfg.pass_enabled("FoldScaleAxis"):
-        graph = graph_attr.set_shape_inputs(graph, shape)
-        graph = graph.apply(["InferShape", "FoldScaleAxis"])
-    return graph
-
-
-def build(graph, target=None, shape=None, dtype="float32",
-          params=None, target_host=None, layout=None):
-    """Build graph into runtime library.
-
-    The build function will optimize the graph and do the compilation.
-
-    When params is provided, the compiler might split the graph to
-    pre-compute certain values, so the final execution graph can
-    be different from the original one.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to be used in lowering
-
-    target : str or :any:`tvm.target.Target`, optional
-        The build target
-
-    shape : dict of str to tuple, optional
-        The input shape to the graph
-
-    dtype : str or dict of str to str
-        The input types to the graph
-
-    params : dict of str to NDArray
-        Input parameters to the graph that do not change
-        during inference time. Used for pre-compute
-        folding optimization.
-
-    target_host : str or :any:`tvm.target.Target` optional
-        Host compilation target, if target is device.
-        When TVM compiles device specific program such as CUDA,
-        we also need host(CPU) side code to interact with the driver
-        setup the dimensions and parameters correctly.
-        target_host is used to specify the host side codegen target.
-        By default, llvm is used if it is enabled,
-        otherwise a stackvm intepreter is used.
-
-    layout : dict of str to str or str optional
-        The input layout
-
-    Returns
-    -------
-    graph : Graph
-        The final execution graph.
-
-    libmod : tvm.Module
-        The module that comes with the execution graph
-
-    params : dict of str to NDArray
-        The updated parameters of graph if params is passed.
-        This can be different from the params passed in.
-    """
-    target = target if target else tvm.target.current_target()
-    if target is None:
-        raise ValueError("Target is not set in env or passed as argument.")
-    target = tvm.target.create(target)
-
-    # If current dispatch context is fallback context (the default root context),
-    # then load pre-tuned parameters from TopHub
-    if isinstance(autotvm.DispatchContext.current, autotvm.FallbackContext):
-        tophub_context = autotvm.tophub.context(target)
-    else:
-        tophub_context = autotvm.util.EmptyContext()
-
-    with tophub_context:
-        shape = shape if shape else {}
-        if not isinstance(shape, dict):
-            raise TypeError("require shape to be dict")
-        for value in shape.values():
-            if not all(isinstance(x, tvm._ffi.base.integer_types) for x in value):
-                raise TypeError("shape value must be Integer types iterator")
-
-        cfg = BuildConfig.current
-        graph = graph if isinstance(graph, _graph.Graph) else _graph.create(graph)
-        shape, dtype = _update_shape_dtype(shape, dtype, params)
-
-        # correct layout if necessary
-        layout = layout if layout else {}
-        graph = graph_attr.set_layout_inputs(graph, layout)
-        graph = graph.apply("CorrectLayout")
-        index = graph.index
-        layouts = graph.json_attr("layout")
-        layout = {x: layouts[index.entry_id(x)] for x in index.input_names}
-
-        # Initial pass do shape type inference
-        ishape, _ = graph_util.infer_shape(graph, **shape)
-        shape.update(zip(graph.index.input_names, ishape))
-        if not isinstance(dtype, str):
-            idtype, _ = graph_util.infer_dtype(graph, **dtype)
-            dtype.update(zip(graph.index.input_names, idtype))
-        # Initialize all variables specified in _all_var_init
-        init_var = {}
-        if _all_var_init:
-            init_var = initialize_variables(shape, dtype)
-        # Apply optimization
-        with target:
-            graph = optimize(graph, shape, dtype, layout)
-
-        # Clear extra params without nodes.
-        _remove_noref_params(params, graph)
-
-        # Precompute prune
-        if params and cfg.pass_enabled("PrecomputePrune"):
-            graph, params = precompute_prune(graph, params)
-            shape, dtype = _update_shape_dtype(shape, dtype, params)
-        # Operator Fusion and generation
-        graph = graph_attr.set_shape_inputs(graph, shape)
-        graph = graph.apply("InferShape")
-        graph = graph_attr.set_dtype_inputs(graph, dtype)
-        graph._set_json_attr("target", str(target), "str")
-        if target_host is not None:
-            graph._set_json_attr("target_host", str(target_host), "str")
-        if cfg.pass_enabled("OpFusion"):
-            graph._set_json_attr("opt_level", 1, "int")
-        else:
-            graph._set_json_attr("opt_level", 0, "int")
-        graph = graph.apply("InferShape").apply("InferType")
-        graph = graph.apply("GraphFindFusibleGroups")
-        graph = graph.apply("GraphFuse")
-        with target:
-            graph = graph.apply("GraphCompile")
-        libmod = graph_attr._move_out_module(graph, "module")
-        # Write variable initial values into params
-        if init_var:
-            if params is None:
-                params = {}
-            params.update(init_var)
-        return graph, libmod, params
-
-def _remove_noref_params(params, graph):
-    """ Helper to clear non referenced params
-
-    Parameters
-    ----------
-    graph : Graph
-        The input graph
-
-    params: dict of str to ndarray
-        The parameter dictionary
-    """
-    arg_list = set(graph.symbol.list_input_names())
-
-    if params:
-        param_keys = list(params.keys())
-        for key in param_keys:
-            if key not in arg_list:
-                params.pop(key)
-
-def _run_graph(graph, params):
-    """Helper utility to build and run and get outputs, only use cpu mode.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to be executed.
-
-    params: dict of str to ndarray
-        The parameter dictionary.
-
-    Returns
-    -------
-    out_dict: dict of str to tvm.NDArray
-        The output dictionaries.
-    """
-    graph = graph if isinstance(graph, _graph.Graph) else _graph.create(graph)
-    shape = {k : v.shape for k, v in params.items()}
-    dtype = {k : v.dtype for k, v in params.items()}
-    target = "llvm"
-    ctx = tvm.cpu(0)
-    _, oshape = graph_util.infer_shape(graph, **shape)
-    _, odtype = graph_util.infer_dtype(graph, **dtype)
-    graph, libmod, _ = build(graph, target, shape, dtype)
-    m = graph_runtime.create(graph, libmod, ctx)
-    set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
-    kset = set(graph.symbol.list_input_names())
-    for k, v in params.items():
-        if k in kset:
-            set_input(k, tvm.nd.array(v))
-    run()
-    out_data = []
-    for i, kv in enumerate(zip(oshape, odtype)):
-        shape, dtype = kv
-        arr = tvm.nd.empty(shape, dtype, ctx)
-        get_output(i, arr)
-        out_data.append(arr)
-    return out_data
-
-
-def precompute_prune(graph, params):
-    """Precompute the part of graph that can be pre-computed.
-
-    This will create a new graph that only contains the ops
-    that need to be computed depending on input as well as
-    updated version of param dict that pre-computes some of
-    intermediate results.
-
-    Parameters
-    ----------
-    graph : Graph
-        The input graph
-
-    params : dict of str -> tvm.NDArray
-        The parameter dictionary of the graph
-
-    Returns
-    -------
-    pruned_graph : Graph
-        The pruned graph
-
-    new_params : dict of str-> tvm.NDArray
-        The updated dictionary of parameters.
-    """
-    graph = graph if isinstance(graph, _graph.Graph) else _graph.create(graph)
-    graph._set_json_attr("param_name_list", list(params.keys()), "list_str")
-    graph = graph.apply("PrecomputePrune")
-    pre_graph = graph_attr._move_out_graph(graph, "precompute_graph")
-    if pre_graph is None:
-        return graph, params
-    out_names = pre_graph.json_attr("output_names")
-    if not pre_graph.symbol.list_output_names():
-        return graph, params
-    with tvm.build_config(auto_unroll_max_step=0):
-        out_arrs = _run_graph(pre_graph, params)
-    return graph, dict(zip(out_names, out_arrs))
-
-
-def initialize_variables(ishape, idtype):
-    """ Initialize variables stored in _all_var_init dictionary.
-
-    Parameters
-    ----------
-    ishape : dict of str to tuple of int
-        The input shape to the graph
-
-    idtype : str or dict of str to str
-        The input types to the graph
-
-    Returns
-    -------
-    init_var : dict of str to tvm.ndarray
-    """
-    symbol_init_dict = {}
-    const_init_dict = {}
-    init_var = {}
-    for key, value in _all_var_init.items():
-        if isinstance(value, sym.Symbol):
-            symbol_init_dict[key] = value
-        else:
-            const_init_dict[key] = tvm.nd.array(value)
-    # Make sure variables are initialized only once.
-    _all_var_init.clear()
-    if symbol_init_dict:
-        # Create dummy params to run initialization graph
-        params = {}
-        for name, shape in ishape.items():
-            dtype = idtype if isinstance(idtype, str) else idtype[name]
-            params[name] = tvm.nd.empty(shape, dtype, ctx=tvm.cpu())
-        init_group_sym = sym.Group(symbol_init_dict.values())
-        graph = _graph.create(init_group_sym)
-        with tvm.build_config(auto_unroll_max_step=0):
-            init_values = _run_graph(graph, params)
-        init_var.update(dict(zip(symbol_init_dict.keys(), init_values)))
-    init_var.update(const_init_dict)
-    for name, data in init_var.items():
-        ishape[name] = data.shape
-    return init_var
diff --git a/nnvm/python/nnvm/compiler/compile_engine.py b/nnvm/python/nnvm/compiler/compile_engine.py
deleted file mode 100644
index d7799bf7b0e7..000000000000
--- a/nnvm/python/nnvm/compiler/compile_engine.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Compiler engine interface to internal engine
-
-You can get the engine singleton at ``nnvm.compiler.engine``
-"""
-import tvm
-
-_list_cache_items = tvm.get_global_func("nnvm.compiler.ListCacheItems")
-_clear_cache = tvm.get_global_func("nnvm.compiler.ClearCache")
-_get_cache_item = tvm.get_global_func("nnvm.compiler.GetCacheItem")
-_set_cache_item = tvm.get_global_func("nnvm.compiler.SetCacheItem")
-_graph_key_get_graph = tvm.get_global_func("nnvm.compiler.GraphKeyGetGraph")
-_make_graph_key = tvm.get_global_func("nnvm.compiler.MakeGraphKey")
-
-@tvm.register_node
-class GraphKey(tvm.node.NodeBase):
-    """Key of a graph compilation context"""
-    @property
-    def graph(self):
-        return _graph_key_get_graph(self)
-
-
-@tvm.register_node
-class GraphCacheEntry(tvm.node.NodeBase):
-    """CacheEntry of compilation into a TVM Function"""
-
-
-@tvm.register_node
-class GraphFunc(tvm.node.NodeBase):
-    """Compiled result of a graph into a TVM Function"""
-
-
-class Engine(object):
-    """Global singleton compilation engine.
-
-    You can get the singleton at ``nnvm.compiler.engine``
-    """
-    def items(self):
-        """List the available cache key value pairs.
-
-        Returns
-        -------
-        item_list : list of (GraphKey, GraphCacheEntry)
-            The existing cache items
-        """
-        res = _list_cache_items()
-        assert len(res) % 2 == 0
-        return [(res[2*i], res[2*i+1]) for i in range(len(res) // 2)]
-
-    def clear_cache(self):
-        """Clear the existing cached functions."""
-        _clear_cache()
-
-    def __setitem__(self, key, value):
-        """Clear the existing cached functions."""
-        if isinstance(value, GraphCacheEntry):
-            _set_cache_item(key, value.graph_func)
-        else:
-            _set_cache_item(key, value)
-
-    def __getitem__(self, key):
-        """Clear the existing cached functions."""
-        return _get_cache_item(key)
-
-    def dump(self):
-        """Return a string representation of engine dump
-
-        Returns
-        -------
-        dump : str
-            The dumped string representation
-        """
-        items = self.items()
-        res = "====================================\n"
-        res += "CompilerEngine dump, %d items cached\n" % len(items)
-        for key, value in items:
-            res += "------------------------------------\n"
-            res += "target={}\n".format(key.target)
-            res += "inputs={}\n".format(key.inputs)
-            res += "use_count={}\n".format(value.use_count)
-            res += "func_name={}\n".format(value.graph_func.func_name)
-            res += key.graph.ir() + "\n"
-        res += "===================================\n"
-        return res
-
-engine = Engine()
-
-
-def graph_key(graph, inputs, target):
-    """Construct a new graph key.
-
-    Parameters
-    ----------
-    graph : Graph
-        The computation graph structure
-
-    inputs : list of Tensor(placeholder)
-        The input requirement to the graph.
-
-    target : str
-        The target of compilation.
-    """
-    return _make_graph_key(graph, inputs, target)
diff --git a/nnvm/python/nnvm/compiler/graph_attr.py b/nnvm/python/nnvm/compiler/graph_attr.py
deleted file mode 100644
index de557cce78b3..000000000000
--- a/nnvm/python/nnvm/compiler/graph_attr.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Utilities to access graph attributes"""
-from __future__ import absolute_import as _abs
-
-import tvm
-
-def set_shape_inputs(g, shape):
-    """Set the shape of input graph nodes in the graph attribute.
-
-    Parameters
-    ----------
-    g : Graph
-        The input graph
-
-    shape : dict of str to tuple
-        The input shape
-
-    Returns
-    -------
-    g : Graph
-        The updated graph with updated shape.
-    """
-    list_shape = [
-        shape.get(name, ()) for name in g.index.input_names]
-    g._set_json_attr("shape_inputs", list_shape, 'list_shape')
-    return g
-
-
-DTYPE_TO_TCODE = {
-    "default": -1,
-    "float32": 0,
-    "float64": 1,
-    "float16": 2,
-    "uint8": 3,
-    "int32": 4,
-    "int8": 5,
-    "int64": 6,
-    "int16": 7,
-    "uint16": 8,
-    "uint32": 9,
-    "uint64": 10,
-    "bool": 11,
-}
-
-TCODE_TO_DTYPE = {
-    -1: None,
-    0: "float32",
-    1: "float64",
-    2: "float16",
-    3: "uint8",
-    4: "int32",
-    5: "int8",
-    6: "int64",
-    7: "int16",
-    8: "uint16",
-    9: "uint32",
-    10: "uint64",
-    11: "bool",
-}
-
-def set_dtype_inputs(g, dtype):
-    """Set the dtype inputs of graph nodes
-
-    Parameters
-    ----------
-    g : Graph
-        The input graph
-
-    dtype : dict of str to str or str
-        The input dtype
-
-    Returns
-    -------
-    g : Graph
-        The updated graph with updated dtype.
-    """
-    if isinstance(dtype, dict):
-        list_dtype = [
-            DTYPE_TO_TCODE[str(dtype.get(name, "default"))]
-            for name in g.index.input_names]
-    else:
-        list_dtype = [DTYPE_TO_TCODE[dtype]] * len(g.index.input_names)
-    g._set_json_attr("dtype_inputs", list_dtype, "list_int")
-    return g
-
-
-def set_layout_inputs(g, layout):
-    """Set the layout inputs of graph nodes
-
-    Parameters
-    ----------
-    g : Graph
-        The input graph
-
-    layout : dict of str to str or str
-        The input layout
-
-    Returns
-    -------
-    g : Graph
-        The updated graph with updated layout.
-    """
-    if isinstance(layout, dict):
-        list_layout = [
-            layout.get(name, "__undef__") for name in g.index.input_names]
-    elif isinstance(layout, str):
-        list_layout = ["__undef__"] * len(g.index.input_names)
-        list_layout[0] = layout
-    else:
-        raise ValueError("Input layout must be str or dict")
-    last_inferred_layouts = g.json_attr("layout")
-    if last_inferred_layouts:
-        input_layout = [last_inferred_layouts[g.index.entry_id(x)] for x in g.index.input_names]
-        for i, layout_stored in enumerate(input_layout):
-            list_layout[i] = list_layout[i] if list_layout[i] != '__undef__' else layout_stored
-    g._set_json_attr("layout_inputs", list_layout, 'list_layout')
-    return g
-
-_move_out_module = tvm.get_global_func("nnvm.graph._move_module")
-_move_out_graph = tvm.get_global_func("nnvm.graph._move_graph")
diff --git a/nnvm/python/nnvm/compiler/graph_pass.py b/nnvm/python/nnvm/compiler/graph_pass.py
deleted file mode 100644
index a11a80e43fe4..000000000000
--- a/nnvm/python/nnvm/compiler/graph_pass.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Namespace of graph pass.
-
-Principle:
-- Graph in, graph out: always takes in graph as first argument and returns a graph
-- Composable API: break graph transformation pass as segments of small transformations.
-"""
-from __future__ import absolute_import as _abs
diff --git a/nnvm/python/nnvm/compiler/graph_util.py b/nnvm/python/nnvm/compiler/graph_util.py
deleted file mode 100644
index 3ce38dacacc3..000000000000
--- a/nnvm/python/nnvm/compiler/graph_util.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Utility function to get information from graph."""
-from __future__ import absolute_import as _abs
-
-import tvm
-from . import graph_attr
-
-from ..graph import create
-from ..symbol import Group, ones_like
-
-def infer_shape(graph, **shape):
-    """Infer the shape given the shape of inputs.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to perform shape inference from
-
-    shape : dict of str to tuple
-        The specific input shape.
-
-    Returns
-    -------
-    in_shape : list of tuple
-         Shape of inputs
-
-    out_shape: list of tuple
-         Shape of outputs
-    """
-    graph = graph_attr.set_shape_inputs(graph, shape)
-    graph = graph.apply("InferShape")
-    shape = graph.json_attr("shape")
-    index = graph.index
-    input_shape = [shape[index.entry_id(x)] for x in index.input_names]
-    output_shape = [shape[index.entry_id(x)] for x in index.output_entries]
-    return input_shape, output_shape
-
-
-def infer_dtype(graph, **dtype):
-    """Infer the type given the typeS of inputs.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to perform type inference from
-
-    dtype : dict of str to dtype
-        The specific input data type.
-
-    Returns
-    -------
-    in_dtype : list of tuple
-         Dtype of inputs
-
-    out_dtype: list of tuple
-         Dtype of outputs
-    """
-    graph = graph_attr.set_dtype_inputs(graph, dtype)
-    graph = graph.apply("InferType")
-    dtype = graph.json_attr("dtype")
-    index = graph.index
-    input_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]]
-                   for x in index.input_names]
-    output_dtype = [graph_attr.TCODE_TO_DTYPE[dtype[index.entry_id(x)]]
-                    for x in index.output_entries]
-    return input_dtype, output_dtype
-
-
-_deep_compare = tvm.get_global_func("nnvm.graph.DeepCompare")
-
-def check_graph_equal(grapha, graphb, compare_variable_attrs=False):
-    """Check if two graphs have equal structure.
-
-    Parameters
-    ----------
-    grapha : Graph
-        The first graph
-
-    graphb : Graph
-        The second graph
-
-    compare_variable_attrs : bool, optional
-        Whether we want to compare attributes(names) on variables.
-        Usually it is safe to skip it unless we want input name
-        to exactly match
-
-    Raises
-    ------
-    ValueError
-        ValueError is raised with error message when graph not equal
-    """
-    err = _deep_compare(grapha, graphb, compare_variable_attrs)
-    if err:
-        raise ValueError("Graph compare error: " + err)
-
-def get_gradient_graph(ys, xs, grad_ys=None):
-    """Create gradient graph of ys with respect to xs.
-
-    Parameters
-    ----------
-    ys : Symbol or list of Symbol
-        Symbols from which the gradient is calculated.
-    xs : Symbol or list of Symbol
-        Symbols the gradient respect to.
-        For group symbol, gradients for all outputs will be calculated.
-    grad_ys : Symbol or list of Symbol
-        Head gradients for ys.
-
-    Returns
-    -------
-    ret : Graph
-        Generated gradient graph.
-    """
-    if isinstance(ys, list):
-        ys = Group(ys)
-    g = create(ys)
-    g._set_symbol_list_attr('grad_ys', ys)
-    g._set_symbol_list_attr('grad_xs', xs)
-    ny = len(ys.list_output_names())
-    if grad_ys is None:
-        grad_ys = [ones_like(ys[i]) for i in range(ny)]
-    g._set_symbol_list_attr('grad_ys_out_grad', grad_ys)
-    return g.apply('Gradient')
-
-def gradients(ys, xs, grad_ys=None):
-    """Create gradient symbol of ys respect to xs.
-
-    Parameters
-    ----------
-    ys : Symbol or list of Symbol
-        Symbols from which the gradient is calculated.
-    xs : Symbol or list of Symbol
-        Symbols the gradient respect to.
-        For group symbol, gradients for all outputs will be calculated.
-    grad_ys : Symbol or list of Symbol
-        Head gradients for ys.
-
-    Returns
-    -------
-    ret : list of Symbol
-        Generated gradient symbol. For each xs,
-        all gradients from ys are merged into a single symbol.
-    """
-    grad_g = get_gradient_graph(ys, xs, grad_ys)
-    nx = len(Group(xs).list_output_names()) \
-        if isinstance(xs, list) else len(xs.list_output_names())
-    ret = [grad_g.symbol[i] for i in range(nx)]
-    return ret
diff --git a/nnvm/python/nnvm/compiler/lr_scheduler.py b/nnvm/python/nnvm/compiler/lr_scheduler.py
deleted file mode 100644
index 3a33f390b6f4..000000000000
--- a/nnvm/python/nnvm/compiler/lr_scheduler.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=too-few-public-methods, no-member
-"""API for scheduling learning rate."""
-from .. import symbol as sym
-
-class LRScheduler(object):
-    """Base class of a learning rate scheduler.
-
-    A scheduler returns a new learning rate based on the number of updates that have
-    been performed.
-
-    Parameters
-    ----------
-    base_lr : float, optional
-        The initial learning rate.
-    """
-    def __init__(self, base_lr=0.01, name='LRScheduler'):
-        self.name = name
-        self.base_lr = base_lr
-
-    def __call__(self, num_update):
-        """Return a new learning rate based on number of updates.
-
-        Parameters
-        ----------
-        num_update: nnvm Symbol
-            the number of updates applied to weight.
-        """
-        raise NotImplementedError("__call__ method must be overridden.")
-
-class FactorScheduler(LRScheduler):
-    """Reduce the learning rate by a factor for every *n* steps.
-
-    It returns a new learning rate by::
-
-        base_lr * pow(factor, num_update/step)
-
-    Parameters
-    ----------
-    step : int
-        Changes the learning rate for every n updates.
-    factor : float, optional
-        The factor to change the learning rate.
-    stop_factor_lr : float, optional
-        Stop updating the learning rate if it is less than this value.
-    """
-    def __init__(self, step, factor=1, stop_factor_lr=1e-8, name='FactorScheduler', **kwargs):
-        super(FactorScheduler, self).__init__(name=name, **kwargs)
-        if step < 1:
-            raise ValueError("Schedule step must be greater or equal than 1 round")
-        if factor > 1.0:
-            raise ValueError("Factor must be no more than 1 to make lr reduce")
-        self.step = step
-        self.factor = factor
-        self.stop_factor_lr = stop_factor_lr
-
-    def __call__(self, num_update):
-        updated_lr = self.base_lr * self.factor ** (num_update / self.step)
-        return sym.clip(updated_lr, a_min=self.stop_factor_lr, a_max=self.base_lr)
diff --git a/nnvm/python/nnvm/compiler/optimizer.py b/nnvm/python/nnvm/compiler/optimizer.py
deleted file mode 100644
index ba739b8c7056..000000000000
--- a/nnvm/python/nnvm/compiler/optimizer.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, no-member, too-few-public-methods, too-many-arguments, too-many-locals, protected-access
-"""Optimizer API"""
-from . import graph_util
-from .. import symbol as sym
-
-class Optimizer(object):
-    """Base class inherited by all optimizers.
-
-    Parameters
-    ----------
-    learning_rate : float, optional
-        The initial learning rate.
-
-    lr_scheduler : LRScheduler, optional
-        The learning rate scheduler.
-
-    rescale_grad : float, optional
-        Multiply the gradient with `rescale_grad` before updating. Often
-        choose to be ``1.0/batch_size``.
-
-    clip_gradient : float, optional
-        Clip the gradient by projecting onto the box ``[-clip_gradient, clip_gradient]``.
-
-    wd : float, optional
-        The weight decay (or L2 regularization) coefficient. Modifies objective
-        by adding a penalty for having large weights.
-
-    name : string, optional
-        The name of optimizer.
-    """
-    def __init__(self, learning_rate=0.01, lr_scheduler=None,
-                 rescale_grad=1, clip_gradient=None, wd=0, name="Optimizer"):
-        self.name = name
-        self.lr = learning_rate
-        self.lr_scheduler = lr_scheduler
-        self.rescale_grad = rescale_grad
-        self.clip_gradient = clip_gradient
-        self.wd = wd
-        init_update_t = sym.Variable(name+'_t', init=sym.zeros(shape=(1,), dtype="int32"))
-        self.update_t = sym._assign(init_update_t, init_update_t + 1)
-
-    def minimize(self, obj, var=None):
-        """Minimize given obj symbol respect to var. If var is not set, all input
-        variables of obj will be used.
-
-        Parameters
-        ----------
-        obj : nnvm Symbol or list of nnvm Symbols
-            Symbols to be minimized.
-        var : nnvm Symbol or list of nnvm Symbols, optional
-            Symbols the gradient respect to.
-
-        Returns
-        -------
-        group_sym : nnvm Symbol
-            Group symbol represents update symbols.
-        """
-        raise NotImplementedError()
-
-    def _get_lr(self):
-        """Gets the learning rate with learning rate scheduler.
-
-        Returns
-        -------
-        lr : float
-            Learning rate.
-        """
-        if self.lr_scheduler is not None:
-            lr = self.lr_scheduler(self.update_t)
-        else:
-            lr = self.lr
-        return lr
-
-
-class SGD(Optimizer):
-    """The SGD optimizer
-    """
-    def __init__(self, name='SGD', **kwargs):
-        super(SGD, self).__init__(name=name, **kwargs)
-
-    def minimize(self, obj, var=None):
-        variables = var or obj.list_input_variables()
-        if not isinstance(variables, list):
-            variables = [variables]
-        grads = graph_util.gradients(obj, variables)
-        updates = []
-        lr_t = self._get_lr()
-        for v, g in zip(variables, grads):
-            g = self.rescale_grad * g
-            if self.clip_gradient is not None:
-                g = sym.clip(g, a_min=-1 * self.clip_gradient, a_max=self.clip_gradient)
-            updates.append(sym._assign(v, v - lr_t * (g + self.wd * v)))
-        return sym.Group(updates)
-
-
-class Adam(Optimizer):
-    """The Adam optimizer.
-
-    This class implements the optimizer described in *Adam: A Method for
-    Stochastic Optimization*, available at http://arxiv.org/abs/1412.6980.
-    """
-    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999,
-                 epsilon=1e-8, name='Adam', **kwargs):
-        super(Adam, self).__init__(learning_rate=learning_rate, name=name, **kwargs)
-        self.beta1 = beta1
-        self.beta2 = beta2
-        self.epsilon = epsilon
-        self.m = []
-        self.v = []
-
-    def minimize(self, obj, var=None):
-        variables = var or obj.list_input_variables()
-        if not isinstance(variables, list):
-            variables = [variables]
-        grads = graph_util.gradients(obj, variables)
-        updates = []
-        for i, v in enumerate(variables):
-            self.m.append(sym.Variable(self.name + '_m' + str(i), init=sym.zeros_like(v)))
-            self.v.append(sym.Variable(self.name + '_v' + str(i), init=sym.zeros_like(v)))
-        rate = sym.sqrt(1 - self.beta2 ** self.update_t) / (1 -  self.beta1 ** self.update_t)
-        lr_t = self._get_lr() * rate
-        for variable, g, m, v in zip(variables, grads, self.m, self.v):
-            g = self.rescale_grad * g
-            if self.clip_gradient is not None:
-                g = sym.clip(g, a_min=-1 * self.clip_gradient, a_max=self.clip_gradient)
-            update_m = sym._assign(m, self.beta1 * m + (1 - self.beta1) * g)
-            update_v = sym._assign(v, self.beta2 * v + (1 - self.beta2) * g * g)
-            update_var = sym._assign(variable, variable - lr_t * (update_m / (sym.sqrt(update_v) \
-                         + self.epsilon) + self.wd * variable))
-            updates.append(update_var)
-        return sym.Group(updates)
diff --git a/nnvm/python/nnvm/compiler/param_dict.py b/nnvm/python/nnvm/compiler/param_dict.py
deleted file mode 100644
index a543e0a827b3..000000000000
--- a/nnvm/python/nnvm/compiler/param_dict.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Helper utility to save parameter dict"""
-import tvm
-
-_save_param_dict = tvm.get_global_func("nnvm.compiler._save_param_dict")
-_load_param_dict = tvm.get_global_func("nnvm.compiler._load_param_dict")
-
-def save_param_dict(params):
-    """Save parameter dictionary to binary bytes.
-
-    The result binary bytes can be loaded by the
-    GraphModule with API "load_params".
-
-    Parameters
-    ----------
-    params : dict of str to NDArray
-        The parameter dictionary.
-
-    Returns
-    -------
-    param_bytes: bytearray
-        Serialized parameters.
-
-    Examples
-    --------
-    .. code-block:: python
-
-       # compile and save the modules to file.
-       graph, lib, params = nnvm.compiler.build(
-          graph, target, shape={"data", data_shape}, params=params)
-       module = graph_runtime.create(graph, lib, tvm.gpu(0))
-       # save the parameters as byte array
-       param_bytes = nnvm.compiler.save_param_dict(params)
-       # We can serialize the param_bytes and load it back later.
-       # Pass in byte array to module to directly set parameters
-       module["load_params"](param_bytes)
-    """
-    args = []
-    for k, v in params.items():
-        args.append(k)
-        args.append(tvm.nd.array(v))
-    return _save_param_dict(*args)
-
-
-def load_param_dict(param_bytes):
-    """Load parameter dictionary to binary bytes.
-
-    Parameters
-    ----------
-    param_bytes: bytearray
-        Serialized parameters.
-
-    Returns
-    -------
-    params : dict of str to NDArray
-        The parameter dictionary.
-    """
-    if isinstance(param_bytes, (bytes, str)):
-        param_bytes = bytearray(param_bytes)
-    load_arr = _load_param_dict(param_bytes)
-    return {v.name : v.array for v in load_arr}
diff --git a/nnvm/python/nnvm/contrib.py b/nnvm/python/nnvm/contrib.py
deleted file mode 100644
index c3e943682db5..000000000000
--- a/nnvm/python/nnvm/contrib.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Module space to register contrib functions. Leave empty"""
diff --git a/nnvm/python/nnvm/cython/README b/nnvm/python/nnvm/cython/README
deleted file mode 100644
index d9deab1abca9..000000000000
--- a/nnvm/python/nnvm/cython/README
+++ /dev/null
@@ -1 +0,0 @@
-Cython specific implementation of certain modules
\ No newline at end of file
diff --git a/nnvm/python/nnvm/cython/base.pyi b/nnvm/python/nnvm/cython/base.pyi
deleted file mode 100644
index 40ef71a20546..000000000000
--- a/nnvm/python/nnvm/cython/base.pyi
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ctypedef void* SymbolHandle
-ctypedef void* OpHandle
-ctypedef unsigned nn_uint
-
-cdef py_str(const char* x):
-    if PY_MAJOR_VERSION < 3:
-        return x
-    else:
-        return x.decode("utf-8")
-
-
-cdef c_str(pystr):
-    """Create ctypes char * from a python string
-    Parameters
-    ----------
-    string : string type
-        python string
-
-    Returns
-    -------
-    str : c_char_p
-        A char pointer that can be passed to C API
-    """
-    return pystr.encode("utf-8")
-
-
-cdef CALL(int ret):
-    if ret != 0:
-        raise NNVMError(NNGetLastError())
-
-
-cdef const char** CBeginPtr(vector[const char*]& vec):
-    if (vec.size() != 0):
-        return &vec[0]
-    else:
-        return NULL
-
-cdef vector[const char*] SVec2Ptr(vector[string]& vec):
-    cdef vector[const char*] svec
-    svec.resize(vec.size())
-    for i in range(vec.size()):
-        svec[i] = vec[i].c_str()
-    return svec
-
-
-cdef BuildDoc(nn_uint num_args,
-              const char** arg_names,
-              const char** arg_types,
-              const char** arg_descs,
-              remove_dup=True):
-    """Convert ctypes returned doc string information into parameters docstring.
-
-    num_args : nn_uint
-        Number of arguments.
-
-    arg_names : ctypes.POINTER(ctypes.c_char_p)
-        Argument names.
-
-    arg_types : ctypes.POINTER(ctypes.c_char_p)
-        Argument type information.
-
-    arg_descs : ctypes.POINTER(ctypes.c_char_p)
-        Argument description information.
-
-    remove_dup : boolean, optional
-        Whether remove duplication or not.
-
-    Returns
-    -------
-    docstr : str
-        Python docstring of parameter sections.
-    """
-    param_keys = set()
-    param_str = []
-    for i in range(num_args):
-        key = arg_names[i]
-        if key in param_keys and remove_dup:
-            continue
-        param_keys.add(key)
-        type_info = arg_types[i]
-        ret = '%s : %s' % (key, type_info)
-        if len(arg_descs[i]) != 0:
-            ret += '\n    ' + py_str(arg_descs[i])
-        param_str.append(ret)
-    doc_str = ('Parameters\n' +
-               '----------\n' +
-               '%s\n')
-    doc_str = doc_str % ('\n'.join(param_str))
-    return doc_str
diff --git a/nnvm/python/nnvm/cython/symbol.pyx b/nnvm/python/nnvm/cython/symbol.pyx
deleted file mode 100644
index eedf2afbbc2a..000000000000
--- a/nnvm/python/nnvm/cython/symbol.pyx
+++ /dev/null
@@ -1,233 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from __future__ import absolute_import as _abs
-
-import sys as _sys
-import ctypes as _ctypes
-from numbers import Number as _Number
-from .._base import NNVMError
-from ..name import NameManager
-from ..attribute import AttrScope
-from libcpp.vector cimport vector
-from libcpp.string cimport string
-from cpython.version cimport PY_MAJOR_VERSION
-
-include "./base.pyi"
-
-cdef extern from "nnvm/c_api.h":
-    const char* NNGetLastError();
-    int NNListAllOpNames(nn_uint *out_size,
-                      const char ***out_array);
-    int NNGetOpHandle(const char *op_name,
-                      OpHandle *handle);
-    int NNGetOpInfo(OpHandle op,
-                    const char **name,
-                    const char **description,
-                    nn_uint *num_doc_args,
-                    const char ***arg_names,
-                    const char ***arg_type_infos,
-                    const char ***arg_descriptions,
-                    const char **return_type);
-    int NNListOpNames(nn_uint *out_size,
-                      const char ***out_array);
-    int NNSymbolCreateAtomicSymbol(OpHandle op,
-                                   nn_uint num_param,
-                                   const char **keys,
-                                   const char **vals,
-                                   SymbolHandle *out);
-    int NNSymbolFree(SymbolHandle symbol);
-    int NNSymbolSetAttrs(SymbolHandle symbol,
-                         nn_uint num_param,
-                         const char** keys,
-                         const char** values);
-    int NNSymbolCompose(SymbolHandle sym,
-                        const char* name,
-                        nn_uint num_args,
-                        const char** keys,
-                        SymbolHandle* args);
-
-cdef class SymbolBase:
-    """Symbol is symbolic graph."""
-    # handle for symbolic operator.
-    cdef SymbolHandle handle
-
-    def __init__(self, handle):
-        cdef unsigned long ptr
-        if handle is None:
-            self.handle = NULL
-        else:
-            ptr = handle.value
-            self.handle = <SymbolHandle>(ptr)
-
-    def __dealloc__(self):
-        CALL(NNSymbolFree(self.handle))
-
-    @property
-    def handle(self):
-        return _ctypes.cast(<unsigned long>self.handle, _ctypes.c_void_p)
-
-    def _set_attr(self, **kwargs):
-        """Set the attribute of the symbol.
-
-        Parameters
-        ----------
-        **kwargs
-            The attributes to set
-        """
-        SymbolSetAttr(self.handle, kwargs)
-
-
-cdef SymbolSetAttr(SymbolHandle handle, dict kwargs):
-    cdef vector[string] sparam_keys
-    cdef vector[string] sparam_vals
-    cdef nn_uint num_args
-    for k, v in kwargs.items():
-        sparam_keys.push_back(c_str(k))
-        sparam_vals.push_back(c_str(str(v)))
-    # keep strings in vector
-    cdef vector[const char*] param_keys = SVec2Ptr(sparam_keys)
-    cdef vector[const char*] param_vals = SVec2Ptr(sparam_vals)
-    num_args = param_keys.size()
-    CALL(NNSymbolSetAttrs(
-        handle, num_args, CBeginPtr(param_keys), CBeginPtr(param_vals)))
-
-
-_symbol_cls = SymbolBase
-
-cdef _set_symbol_class(cls):
-    global _symbol_cls
-    _symbol_cls = cls
-
-cdef NewSymbol(SymbolHandle handle):
-    """Create a new symbol given handle"""
-    sym = _symbol_cls(None)
-    (<SymbolBase>sym).handle = handle
-    return sym
-
-cdef _make_atomic_symbol_function(OpHandle handle, string name):
-    """Create an atomic symbol function by handle and funciton name."""
-    cdef const char *real_name
-    cdef const char *desc
-    cdef nn_uint num_args
-    cdef const char** arg_names
-    cdef const char** arg_types
-    cdef const char** arg_descs
-    cdef const char* return_type
-
-    CALL(NNGetOpInfo(
-        handle, &real_name, &desc,
-        &num_args, &arg_names,
-        &arg_types, &arg_descs,
-        &return_type))
-
-    param_str = BuildDoc(num_args, arg_names, arg_types, arg_descs)
-    func_name = py_str(name.c_str())
-    doc_str = ('%s\n\n' +
-               '%s\n' +
-               'Returns\n' +
-               '-------\n' +
-               'result: Tensor\n' +
-               '    The result Tensor.')
-    doc_str = doc_str % (desc, param_str)
-    func_hint = func_name.lower()
-
-    def creator(*args, **kwargs):
-        cdef vector[string] sparam_keys
-        cdef vector[string] sparam_vals
-        cdef vector[SymbolHandle] symbol_args
-        cdef vector[string] ssymbol_keys
-        cdef SymbolHandle ret_handle
-
-        name = kwargs.pop("name", None)
-        attr = kwargs.pop("attr", None)
-
-        if len(kwargs) != 0:
-            for k, v in kwargs.items():
-                if isinstance(v, SymbolBase):
-                    ssymbol_keys.push_back(c_str(k))
-                    symbol_args.push_back((<SymbolBase>v).handle)
-                else:
-                    sparam_keys.push_back(c_str(k))
-                    sparam_vals.push_back(c_str(str(v)))
-
-        if len(args) != 0:
-            if symbol_args.size() != 0:
-                raise TypeError("compose only accept input Symbols\
-                    either as positional or keyword arguments, not both")
-            for v in args:
-                if not isinstance(v, SymbolBase):
-                    raise TypeError('Compose expect `Symbol` as arguments')
-                symbol_args.push_back((<SymbolBase>v).handle)
-
-        cdef vector[const char*] param_keys = SVec2Ptr(sparam_keys)
-        cdef vector[const char*] param_vals = SVec2Ptr(sparam_vals)
-        cdef vector[const char*] symbol_keys = SVec2Ptr(ssymbol_keys)
-
-        CALL(NNSymbolCreateAtomicSymbol(
-            handle,
-            <nn_uint>param_keys.size(),
-            CBeginPtr(param_keys),
-            CBeginPtr(param_vals),
-            &ret_handle))
-        num_args = <nn_uint>(symbol_args.size())
-
-        attr = AttrScope.current.get(attr)
-        if attr:
-            SymbolSetAttr(ret_handle, attr)
-        name = NameManager.current.get(name, func_hint)
-
-        cdef const char* c_name = NULL
-
-        if name:
-            name = c_str(name)
-            c_name = name
-
-        CALL(NNSymbolCompose(
-            ret_handle,
-            c_name,
-            num_args,
-            &symbol_keys[0] if symbol_keys.size() != 0 else NULL,
-            &symbol_args[0] if symbol_args.size() != 0 else NULL))
-        return NewSymbol(ret_handle)
-
-    creator.__name__ = func_name
-    creator.__doc__ = doc_str
-    return creator
-
-
-def _init_symbol_module(symbol_class, root_namespace):
-    """List and add all the atomic symbol functions to current module."""
-    cdef const char** op_name_ptrs
-    cdef nn_uint size
-    cdef vector[string] op_names
-    cdef OpHandle handle
-
-    _set_symbol_class(symbol_class)
-    CALL(NNListAllOpNames(&size, &op_name_ptrs))
-    for i in range(size):
-        op_names.push_back(string(op_name_ptrs[i]));
-    module_obj = _sys.modules["%s.symbol" % root_namespace]
-    module_internal = _sys.modules["%s._symbol_internal" % root_namespace]
-    for i in range(op_names.size()):
-        CALL(NNGetOpHandle(op_names[i].c_str(), &handle))
-        function = _make_atomic_symbol_function(handle, op_names[i])
-        if function.__name__.startswith('_'):
-            setattr(module_internal, function.__name__, function)
-            setattr(module_obj, function.__name__, function)
-        else:
-            setattr(module_obj, function.__name__, function)
diff --git a/nnvm/python/nnvm/frontend/__init__.py b/nnvm/python/nnvm/frontend/__init__.py
deleted file mode 100644
index 61c294f2606f..000000000000
--- a/nnvm/python/nnvm/frontend/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""NNVM frontends."""
-from __future__ import absolute_import
-from .mxnet import from_mxnet
-from .onnx import from_onnx
-from .coreml import from_coreml
-from .keras import from_keras
-from .darknet import from_darknet
-from .tensorflow import from_tensorflow
-from .caffe2 import from_caffe2
diff --git a/nnvm/python/nnvm/frontend/caffe2.py b/nnvm/python/nnvm/frontend/caffe2.py
deleted file mode 100644
index f951db66b5a6..000000000000
--- a/nnvm/python/nnvm/frontend/caffe2.py
+++ /dev/null
@@ -1,471 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, line-too-long, unused-argument
-"""Caffe2 frontend"""
-from __future__ import absolute_import as _abs
-import tvm
-from nnvm import symbol as _sym
-from .common import get_nnvm_op, Renamer, AttrConverter as AttrCvt
-from .onnx_caffe2_utils import dimension_picker, dimension_constraint, infer_channels, revert_caffe2_pad
-from . import onnx
-
-__all__ = ['from_caffe2']
-
-
-def _clean_up_pool_args(args):
-    """ A helper function to clean up common arguments in conv and pooling ops.
-    """
-    assert isinstance(args, dict)
-
-    if 'stride_h' in args and 'stride_w' in args:
-        assert 'stride' not in args and 'strides' not in args
-        args['strides'] = [args['stride_h'], args['stride_w']]
-        args.pop('stride_h')
-        args.pop('stride_w')
-    elif 'stride' in args:
-        args['strides'] = [args['stride'], args['stride']]
-        args.pop('stride')
-
-    # rename 'kernel', 'kernels', to 'kernel_shape'
-    if 'kernel_h' in args and 'kernel_w' in args:
-        assert 'kernel' not in args and 'kernels' not in args
-        args['kernel_shape'] = [args['kernel_h'], args['kernel_w']]
-        args.pop('kernel_h')
-        args.pop('kernel_w')
-    elif 'kernel' in args:
-        args['kernel_shape'] = [args['kernel'], args['kernel']]
-        args.pop('kernel')
-    elif 'kernels' in args:
-        args['kernel_shape'] = args['kernels']
-        args.pop('kernels')
-
-    if 'pad_t' in args and 'pad_l' in args and 'pad_b' in args and 'pad_r' in args:
-        assert 'pad' not in args and 'pads' not in args
-        args['pads'] = [
-            args['pad_t'], args['pad_l'], args['pad_b'], args['pad_r']
-        ]
-        for pad in ['pad_t', 'pad_l', 'pad_b', 'pad_r']:
-            args.pop(pad)
-    elif 'pad' in args:
-        args['pads'] = [args['pad'], args['pad']]
-        args.pop('pad')
-
-    if 'dilation_h' in args and 'dilation_w' in args:
-        assert 'dilation' not in args and 'dilations' not in args
-        args['dilations'] = [args['dilation_h'], args['dilation_w']]
-        args.pop('dilation_h')
-        args.pop('dilation_w')
-    elif 'dilation' in args:
-        args['dilations'] = [args['dilation'], args['dilation']]
-        args.pop('dilation')
-
-    return args
-
-
-class Caffe2OpConverter(object):
-    """ A helper class for holding Caffe2 op converters.
-    """
-
-    @classmethod
-    def get_converter(cls):
-        """ Get converter.
-
-        :return: converter, which should be `_impl`.
-        """
-
-        if hasattr(cls, '_impl'):
-            return getattr(cls, '_impl')
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not implemented in frontend Caffe2.'.format(cls.__name__))
-
-
-_caffe2_internal_args = {
-    # nnpack args
-    'algo',
-    'convolution_transform_strategy',
-    'float16_compute',
-    'shared_buffer',
-
-    # training args
-    'init_params',
-    'cudnn_exhaustive_search',
-    'exhaustive_search',
-
-    # training args
-    'adj',
-    'hwgq',
-
-    # args that we don't care
-    'legacy_pad',
-}
-
-
-class Pool(Caffe2OpConverter):
-    """ A helper class for pool op converters.
-    """
-
-    name = ''
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        _clean_up_pool_args(args)
-        if 'global_pooling' in args and args['global_pooling'] == 1:
-            op_name = dimension_picker('global_' + cls.name)
-            return get_nnvm_op(op_name(args))(*inputs)
-
-        return AttrCvt(
-            op_name=dimension_picker(cls.name),
-            transforms={
-                'kernel_shape': 'pool_size',
-                'pads': ('padding', (0, 0), revert_caffe2_pad),
-                'strides': 'strides',
-            },
-            excludes={
-                # TVM poolop does not support dilation
-                'dilations',
-            },
-            ignores=_caffe2_internal_args | {'global_pooling', 'order'},
-            custom_check=dimension_constraint())(inputs, args, params)
-
-
-class AveragePool(Pool):
-    name = 'avg_pool'
-
-
-class MaxPool(Pool):
-    name = 'max_pool'
-
-
-class Conv(Caffe2OpConverter):
-    """ Operator converter for Conv.
-    """
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        # get number of channels
-        channels = infer_channels(inputs[1], params)
-        args['channels'] = channels
-        _clean_up_pool_args(args)
-        return AttrCvt(
-            op_name=dimension_picker('conv'),
-            transforms={
-                'group': ('groups', 1),
-                'kernel_shape':
-                'kernel_size',
-                'pads': ('padding', (0, 0), revert_caffe2_pad),
-                'strides':
-                'strides',
-                'dilations': ('dilation', (1, 1)),
-                'order':
-                ('layout', ("NCHW"),
-                 lambda x: x if isinstance(x, str) else x.decode('UTF-8')),
-            },
-            excludes={},
-            ignores=_caffe2_internal_args,
-            extras={'use_bias': len(inputs) == 3},
-            custom_check=dimension_constraint())(inputs, args, params)
-
-
-class Concat(Caffe2OpConverter):
-    """ Operator converter for Concat.
-    """
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        def _get_axis_from_order_str(order):
-            order = order if isinstance(order, str) else order.decode('UTF-8')
-            if order == 'NCHW':
-                return 1
-            if order == 'NHWC':
-                return 3
-            raise tvm.error.OpAttributeInvalid('Value {} in attribute {} of operator {} is not valid.'.format(order, 'order', 'Concat'))
-
-        return AttrCvt(
-            op_name='concatenate',
-            transforms={
-                'order': ('axis', (1), _get_axis_from_order_str),
-            },
-            excludes={
-                'add_axis',
-            })(inputs, args, params)
-
-
-class NormalizePlanarYUV(Caffe2OpConverter):
-    """ Operator converter for NormalizePlanarYUV.
-    caffe2 definition: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/norm_planar_yuv_op.cc
-    """
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        assert len(inputs) == 3
-        mean = _sym.expand_dims(inputs[1], axis=2, num_newaxis=2)
-        std = _sym.expand_dims(inputs[2], axis=2, num_newaxis=2)
-
-        return _sym.broadcast_div(_sym.broadcast_sub(inputs[0], mean), std)
-
-
-class ResizeNearest(Caffe2OpConverter):
-    """ Operator converter for Upsample (nearest mode).
-    """
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        width_scale = args['width_scale'] if 'width_scale' in args else 1
-        height_scale = args['height_scale'] if 'height_scale' in args else 1
-        assert width_scale == height_scale
-
-        return _sym.upsampling(
-            inputs[0], scale=int(width_scale), method="NEAREST_NEIGHBOR")
-
-
-class FC(Caffe2OpConverter):
-    """ Operator converter for FC.
-    """
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        inputs[0] = _sym.flatten(inputs[0])
-        args['units'] = infer_channels(inputs[1], params)
-        return AttrCvt(
-            'dense',
-            ignores=['axis', 'axis_w'],
-            extras={'use_bias': len(inputs) == 3},
-        )(inputs, args, params)
-
-
-class SpatialBN(Caffe2OpConverter):
-    """ Operator converter for SpatialBN.
-    """
-
-    @classmethod
-    def _impl(cls, inputs, args, params):
-        return AttrCvt(
-            op_name='batch_norm',
-            disables=['momentum'],
-            ignores=[
-                'order', 'spatial', 'is_test', 'consumed_inputs', 'num_batches'
-            ])(inputs, args, params)
-
-
-# compatible operators that do NOT require any conversion.
-_identity_list = []
-
-# _convert_map defines maps of name to converter functor(callable)
-# for 1 to 1 mapping, use Renamer if nothing but name is different
-# use AttrCvt if attributes need to be converted
-# for 1 to N mapping(composed), use custom callable functions
-# for N to 1 mapping, currently not supported(?)
-
-# Minimal set of ops for squeezenet and resnet50
-def _get_convert_map():
-    return {
-        # caffe2/onnx common operators
-        'Add': onnx.Add.get_converter(opset=1),
-        'Sum': onnx.Sum.get_converter(opset=1),
-        'Softmax': onnx.Softmax.get_converter(opset=1),
-
-        # nn
-        'AveragePool': AveragePool.get_converter(),
-        'MaxPool': MaxPool.get_converter(),
-        'Conv': Conv.get_converter(),
-        'Concat': Concat.get_converter(),
-        'FC': FC.get_converter(),
-        'SpatialBN': SpatialBN.get_converter(),
-        'ResizeNearest': ResizeNearest.get_converter(),
-        'Relu': AttrCvt('relu', {}, ignores=['order']),
-        'Sigmoid': Renamer('sigmoid'),
-        'Dropout': AttrCvt('dropout', {'ratio': 'rate'}, ignores=['is_test']),
-
-        # c2 image preprocessing ops
-        'NormalizePlanarYUV': NormalizePlanarYUV.get_converter(),
-    }
-
-
-class Caffe2NetDef(object):
-    """A helper class for handling nnvm graph copying from pb2.GraphProto.
-    Definition: https://github.com/pytorch/pytorch/blob/master/caffe2/proto/caffe2.proto
-    """
-
-    def __init__(self):
-        self._nodes = {}
-        self._params = {}
-        self._visited_nodes = set()
-        self._ops = {}
-
-    def from_caffe2(self, init_net, predict_net):
-        """Construct nnvm nodes from caffe2 graph.
-
-        Parameters
-        ----------
-        workspace : Caffe2 workspace
-        predict_net : protobuf object
-
-        Returns
-        -------
-        sym : nnvm.sym.Symbol
-            The returned nnvm symbol
-        params : dict
-            A dict of name: tvm.nd.array pairs, used as pretrained weights
-        """
-        from caffe2.python import workspace
-        workspace.RunNetOnce(init_net)
-
-        # Input
-        input_name = predict_net.op[0].input[0]
-
-        # Params
-        self._params = {}
-        used_blobs = set()
-        for c2_op in predict_net.op:
-            for i in c2_op.input:
-                used_blobs.add(i)
-        for blob in workspace.Blobs():
-            if blob in used_blobs and blob != input_name:
-                self._params[blob] = tvm.nd.array(workspace.FetchBlob(blob))
-
-        # Variables
-        self._nodes = {}
-        for blob in predict_net.external_input:
-            self._nodes[blob] = _sym.Variable(name=blob)
-
-        # Ops
-        for c2_op in predict_net.op:
-            for blob in c2_op.output:
-                self._ops[blob] = c2_op
-        for c2_op in predict_net.op:
-            self._process_op(c2_op)
-
-        # Outputs
-        out = []
-        for blob in predict_net.external_output:
-            out.append(self._nodes[blob])
-
-        if len(out) > 1:
-            sym = _sym.Group(out)
-        else:
-            sym = out[0]
-
-        return sym, self._params
-
-    def _get_node(self, blob):
-        """Get the nnvm Symbol of blob and detect cyclic dependency in the graph."""
-        if blob in self._nodes:
-            return self._nodes[blob]
-
-        assert blob not in self._visited_nodes, 'Cyclic dependency in the graph (in {})'.format(
-            blob)
-        self._visited_nodes.add(blob)
-
-        self._process_op(self._ops[blob])
-        return self._nodes[blob]
-
-    def _process_op(self, c2_op):
-        op_type = c2_op.type
-        args = self._parse_arg(c2_op.arg)
-        inputs = [self._get_node(i) for i in c2_op.input]
-        tvm_op = self._convert_operator(op_type, inputs, args)
-        # Ignore all outputs except the first one
-        self._nodes[c2_op.output[0]] = tvm_op[0]
-
-    def _parse_arg(self, arg):
-        """Convert a list of Argument to a dict, with names as keys."""
-        args = {}
-        for a in arg:
-            for f in ['f', 'i', 's']:
-                if a.HasField(f):
-                    args[a.name] = getattr(a, f)
-            for f in ['floats', 'ints', 'strings']:
-                if list(getattr(a, f)):
-                    assert a.name not in args, "Only one type of attr is allowed"
-                    args[a.name] = tuple(getattr(a, f))
-            for f in ['n']:
-                if a.HasField(f):
-                    raise NotImplementedError(
-                        "Field {} is not supported in nnvm.".format(f))
-            for f in ['nets']:
-                if list(getattr(a, f)):
-                    raise NotImplementedError(
-                        "Field {} is not supported in nnvm.".format(f))
-            if a.name not in args:
-                raise ValueError("Cannot parse attribute: \n{}\n.".format(a))
-        return args
-
-    def _convert_operator(self,
-                          op_type,
-                          inputs,
-                          args,
-                          identity_list=None,
-                          convert_map=None):
-        """Convert from Caffe2 operator to nnvm operator.
-        The converter must specify conversions explicitly for incompatible name, and
-        apply handlers to operator attributes.
-
-        Parameters
-        ----------
-        op_type : str
-            Operator name, such as Convolution, FullyConnected
-        inputs : list of nnvm.Symbol
-            List of input symbols.
-        args : dict
-            Dict of operator attributes
-        identity_list : list
-            List of operators that don't require conversion
-        convert_map : dict
-            Dict of name : callable, where name is the op's name that
-            require conversion to nnvm, callable are functions which
-            take args and return (new_op_type, new_args)
-
-        Returns
-        -------
-        sym : nnvm.Symbol
-            Converted nnvm Symbol
-        """
-        identity_list = identity_list if identity_list else _identity_list
-        convert_map = convert_map if convert_map else _get_convert_map()
-        if op_type in identity_list:
-            sym = get_nnvm_op(op_type)(*inputs, **args)
-        elif op_type in convert_map:
-            # Add a sanitizing step to convert all byte strings in args to strings
-            sym = convert_map[op_type](inputs, args, self._params)
-        else:
-            raise tvm.error.OpNotImplemented(
-                'Operator {} is not supported in frontend Caffe2.'.format(op_type))
-        return sym
-
-
-def from_caffe2(init_net, predict_net):
-    """Load caffe2 graph which contains init_net and predict_net into nnvm graph.
-
-    Parameters
-    ----------
-    init_net : protobuf object
-        Caffe2 NetDef containing the weights
-
-    predict_net : protobuf object
-        Caffe2 NetDef containing the graph
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.ndarray
-        Dict of converted parameters stored in tvm.ndarray format
-    """
-
-    caffe2 = Caffe2NetDef()
-    return caffe2.from_caffe2(init_net, predict_net)
diff --git a/nnvm/python/nnvm/frontend/common.py b/nnvm/python/nnvm/frontend/common.py
deleted file mode 100644
index 0e09a2c43323..000000000000
--- a/nnvm/python/nnvm/frontend/common.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Shared functions and classes for frontends."""
-from __future__ import absolute_import as _abs
-import logging
-from nnvm import sym as _sym
-from .._base import string_types
-
-def get_nnvm_op(op_name):
-    op = getattr(_sym, op_name)
-    if not op:
-        raise OpNotImplemented(
-            'Operator {} is not supported.'.format(op))
-    return op
-
-def required_attr(attr, key, op_name):
-    assert isinstance(attr, dict)
-    if key not in attr:
-        raise OpAttributeRequired(
-            'Required attribute {} not found in operator {}'.format(key, op_name))
-    return attr[key]
-
-def parse_tshape(tshape):
-    """Parse tshape in string."""
-    return [int(x.strip()) for x in tshape.strip('()').split(',')]
-
-def parse_bool_str(attr, key, default='False'):
-    """Parse bool string to boolean."""
-    return attr.get(key, default).strip().lower() in ['true', '1', 't', 'y', 'yes']
-
-class Renamer(object):
-    """A simply renamer for operators.
-
-    Parameters
-    ----------
-    new_name : str
-        The new name for the operator
-    """
-    def __init__(self, new_name):
-        self._new_name = new_name
-
-    def __call__(self, inputs, attrs, *args):
-        return get_nnvm_op(self._new_name)(*inputs, **attrs)
-
-
-class AttrConverter(object):
-    """Common attribute converter. An AttrConverter instance is a callable:
-    ```
-    attr_converter = AttrConverter(op_name, transforms={'a':'b', 'c':('d', 1)})
-    new_op_name, new_attr = attr_converter(attrs)
-    ```
-
-    Parameters
-    ----------
-    op_name : str or callable
-        If set as str, returned operator name is the str.
-        If set as callable, returned operator is the str returned by calling:
-        `op_name = func(attr)`
-    transforms : dict of `new_name, or (new_name, default_value, transform function)`
-        If only a new_name is provided, it's like renaming the attribute name.
-        If default_value if provided, then the attribute is considered as optional.
-        If transform function is provided, the original attribute value is handled
-        by transform function.
-    excludes : list
-        A list of excluded attributes that should `NOT` appear.
-        Raise NotImplementedError if occurred.
-    disables : list
-        A list of attributes that is disabled in nnvm. Log warnings.
-    ignores : list
-        A list of attributes that is ignored in nnvm. Debug level logging.
-    extras : dict
-        A series of additional attributes should be added anyway to the returned
-        attribute dict.
-    custom_check : callable
-        A custom function takes attribute, and return True/False.
-        Raise RuntimeError if not bool(True) returned.
-    """
-    def __init__(self, op_name, transforms=None,
-                 excludes=None, disables=None, ignores=None,
-                 extras=None, custom_check=None):
-        self._op_name = op_name
-        self._transforms = transforms if transforms else {}
-        self._excludes = excludes if excludes else []
-        self._disables = disables if disables else []
-        self._ignores = ignores if ignores else []
-        self._extras = extras if extras else {}
-        self._custom_check = custom_check
-
-    def __call__(self, inputs, attrs, *args):
-        # apply custom check
-        if self._custom_check:
-            func, msg = self._custom_check
-            if not func(attrs):
-                raise RuntimeError("Check failed: {}".format(msg))
-        # get new op_name
-        if isinstance(self._op_name, string_types):
-            op_name = self._op_name
-        else:
-            assert callable(self._op_name), "op_name can either be string or callable"
-            op_name = self._op_name(attrs)
-        # convert attributes
-        new_attrs = {}
-        for k in attrs.keys():
-            if k in self._excludes:
-                raise NotImplementedError("Attribute {} not supported yet.".format(k))
-            elif k in self._disables:
-                logging.warning("Attribute %s is disabled in nnvm.sym.%s", k, op_name)
-            elif k in self._ignores:
-                logging.debug("Attribute %s is ignored in nnvm.sym.%s", k, op_name)
-            elif k in self._transforms:
-                new_name, defaults, transform = self._parse_default(self._transforms[k])
-                if defaults is None:
-                    new_attr = self._required_attr(attrs, k)
-                else:
-                    new_attr = attrs.get(k, None)
-                if new_attr is None:
-                    new_attrs[new_name] = defaults
-                else:
-                    new_attrs[new_name] = transform(new_attr)
-            else:
-                # copy
-                new_attrs[k] = attrs[k]
-        # add extras
-        new_attrs.update(self._extras)
-        return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-    def _parse_default(self, target):
-        """Helper function to parse default values."""
-        if not isinstance(target, (list, tuple)):
-            k, v, t = target, None, lambda x: x
-        elif len(target) == 1:
-            k, v, t = target[0], None, lambda x: x
-        elif len(target) == 2:
-            k, v, t = target[0], target[1], lambda x: x
-        elif len(target) > 2:
-            k, v, t = target[0], target[1], target[2]
-        else:
-            k = None  # should raise
-        if not isinstance(k, string_types):
-            msg = "{} is not a valid target, (name, default) expected.".format(target)
-            raise ValueError(msg)
-        return k, v, t
-
-    def _parse_bool(self, value):
-        """Helper function to parse default boolean values."""
-        if isinstance(value, string_types):
-            return value.strip().lower() in ['true', '1', 't', 'y', 'yes']
-        return bool(value)
-
-    def _required_attr(self, attr, key):
-        """Wrapper for getting required attributes."""
-        assert isinstance(attr, dict)
-        if key not in attr:
-            raise AttributeError("Required attribute {} not found.".format(key))
-        return attr[key]
-
-
-class SymbolTable(object):
-    """Table storing symbols by names."""
-    def __init__(self):
-        self.vars = {}
-        self.params = {}
-        self.const_ctr = 1
-        self.in_padding = False
-        self.paddings = [0, 0]
-
-    def new_const(self, value):
-        name = "_param_%d" % (self.const_ctr)
-        self.const_ctr += 1
-        self.params[name] = value
-        self.vars[name] = _sym.Variable(name=name)
-        return self.vars[name]
-
-    def get_var(self, name, must_contain=True):
-        if must_contain:
-            assert name in self.vars
-        if name not in self.vars:
-            self.vars[name] = _sym.Variable(name=name)
-        return self.vars[name]
-
-    def set_var(self, name, sym):
-        assert isinstance(sym, _sym.Symbol)
-        self.vars[name] = sym
-
-    def set_padding(self, paddings):
-        self.paddings = paddings
-        self.in_padding = True
-
-    def clear_padding(self):
-        self.in_padding = False
diff --git a/nnvm/python/nnvm/frontend/coreml.py b/nnvm/python/nnvm/frontend/coreml.py
deleted file mode 100644
index c5b0c0a799ec..000000000000
--- a/nnvm/python/nnvm/frontend/coreml.py
+++ /dev/null
@@ -1,431 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""CoreML frontend."""
-from __future__ import absolute_import as _abs
-import numpy as np
-import tvm
-from .common import SymbolTable
-from .. import symbol as _sym
-from .._base import string_types
-
-__all__ = ['from_coreml']
-
-
-def NeuralNetworkImageScaler(op, insym, symtab):
-    # this changes the symbol
-    biases = np.array([op.blueBias, op.greenBias, op.redBias]).reshape([3, 1, 1])
-    bias = symtab.new_const(biases)
-    ret = _sym.__mul_scalar__(insym, scalar=op.channelScale)
-    ret = _sym.broadcast_add(ret, bias)
-    return ret
-
-
-def NeuralNetworkMeanImage(op, insym, symtab):
-    # this changes the symbol
-    ret = _sym.elemwise_sub(insym, scalar=op.meanImage)
-    return ret
-
-
-def ConvolutionLayerParams(op, insym, symtab):
-    """Convolution layer params."""
-    weights = symtab.new_const(np.array(list(op.weights.floatValue)).reshape(
-        tuple([op.outputChannels, op.kernelChannels] + list(op.kernelSize))))
-    if op.hasBias:
-        biases = symtab.new_const(list(op.bias.floatValue))
-    dilation = list(op.dilationFactor)
-    if not dilation:
-        dilation = [1, 1]
-    params = {'channels':op.outputChannels,
-              'kernel_size':list(op.kernelSize),
-              'strides':list(op.stride),
-              'dilation': dilation,
-              'use_bias': op.hasBias,
-              'groups':op.nGroups}
-
-    if op.WhichOneof('ConvolutionPaddingType') == 'valid':
-        valid = op.valid
-        padding = [b.startEdgeSize for b in valid.paddingAmounts.borderAmounts]
-        padding2 = [b.endEdgeSize for b in valid.paddingAmounts.borderAmounts]
-        for i, j in zip(padding, padding2):
-            assert i == j, "Asymmetry padding not supported"
-        if padding:
-            params['padding'] = padding
-    elif op.WhichOneof('ConvolutionPaddingType') == 'same':
-        kernel = params['kernel_size']
-        pad_h = kernel[0] - 1
-        pad_w = kernel[1] - 1
-        pad_t = pad_h // 2
-        pad_l = pad_w // 2
-        pad_b = pad_h - pad_t
-        pad_r = pad_w - pad_l
-        assert pad_t == pad_r and pad_l == pad_b, "Asymmetry padding not supported"
-        params['padding'] = [pad_t, pad_l]
-    else:
-        raise NotImplementedError("Valid/Same convolution padding implemented")
-
-    if op.hasBias:
-        pos = [insym, weights, biases]
-    else:
-        pos = [insym, weights]
-
-    # consume padding layer
-    if symtab.in_padding:
-        params['padding'] = [sum(x) for x in zip(params.get('padding', [0, 0]), symtab.paddings)]
-        symtab.clear_padding()
-
-    if op.isDeconvolution:
-        ret = _sym.conv2d_transpose(*pos, **params)
-    else:
-        ret = _sym.conv2d(*pos, **params)
-    return ret
-
-def BatchnormLayerParams(op, insym, symtab):
-    """Get layer of batchnorm parameter"""
-    # this changes the symbol
-    if op.instanceNormalization:
-        msg = 'Operator "instance normalization" is not supported in frontend CoreML.'
-        raise tvm.error.OpNotImplemented(msg)
-    else:
-        params = {'gamma':symtab.new_const(list(op.gamma.floatValue)),
-                  'beta':symtab.new_const(list(op.beta.floatValue)),
-                  'moving_mean':symtab.new_const(list(op.mean.floatValue)),
-                  'moving_var': symtab.new_const(list(op.variance.floatValue)),
-                  'epsilon': op.epsilon}
-        return _sym.batch_norm(data=insym, **params)
-
-def ActivationParams(op, insym, symtab):
-    """Get activation parameters"""
-    whichActivation = op.WhichOneof('NonlinearityType')
-    par = getattr(op, whichActivation)
-    if whichActivation == 'linear':
-        return _sym.__add_scalar__(_sym.__mul_scalar__(insym, scalar=par.alpha), scalar=par.beta)
-    if whichActivation == 'ReLU':
-        return _sym.relu(insym)
-    if whichActivation == 'leakyReLU':
-        return _sym.leaky_relu(insym, alpha=par.alpha)
-    if whichActivation == 'thresholdedReLU':
-        alpha_tensor = _sym.full_like(insym, fill_value=float(par.alpha))
-        return _sym.elemwise_mul(insym, _sym.greater(insym, alpha_tensor))
-    if whichActivation == 'PReLU':
-        return _sym.prelu(insym, alpha=par.alpha)
-    if whichActivation == 'tanh':
-        return _sym.tanh(insym)
-    if whichActivation == 'scaledTanh':
-        return _sym.__mul_scalar__(_sym.tanh(_sym.__mul_scalar__(
-            insym, scalar=par.beta)), scalar=par.alpha)
-    if whichActivation == 'sigmoid':
-        return _sym.sigmoid(insym)
-    if whichActivation == 'sigmoidHard':
-        transformX = (par.alpha * insym) + par.beta
-        return _sym.clip(transformX, a_min=0, a_max=1)
-    if whichActivation == 'ELU':
-        return _sym.__mul_scalar__(_sym.__add_scalar__(
-            _sym.exp(insym), scalar=-1), scalar=par.alpha)
-    if whichActivation == 'softsign':
-        return insym / (1 + (_sym.relu(insym) + _sym.relu(_sym.negative(insym))))
-    if whichActivation == 'softplus':
-        return _sym.log(_sym.__add_scalar__(_sym.exp(insym), scalar=1))
-    if whichActivation == 'parametricSoftplus':
-        alpha = list(par.alpha.floatValue)
-        beta = list(par.alpha.floatValue)
-        if len(alpha) == 1:
-            return _sym.__mul_scalar__(_sym.log(_sym.__add_scalar__(
-                _sym.exp(insym), scalar=beta[0])), scalar=alpha[0])
-        alpha = np.array(alpha).reshape((len(alpha), 1, 1))
-        beta = np.array(beta).reshape((len(beta), 1, 1))
-        alphasym = symtab.new_const(alpha)
-        betasym = symtab.new_const(beta)
-        return _sym.broadcast_mul(_sym.log(_sym.broadcast_add(
-            _sym.exp(insym), betasym)), alphasym)
-    raise tvm.error.OpNotImplemented(
-        'Operator {} is not supported in frontend CoreML.'.format(whichActivation))
-
-def ScaleLayerParams(op, insym, symtab):
-    """Scale layer params."""
-    scale = symtab.new_const(np.array(list(op.scale.floatValue)).reshape(
-        tuple(list(op.shapeScale) + [1, 1])))
-    # scale = _sym.reshape(scale, shape=tuple(list(op.shapeScale) + [1,1]))
-    ret = _sym.broadcast_mul(insym, scale)
-    if op.hasBias:
-        bias = symtab.new_const(np.array(list(op.bias.floatValue)).reshape(
-            tuple(list(op.shapeBias) + [1, 1])))
-        # bias = _sym.reshape(bias, shape=tuple(list(op.shapeBias) + [1,1]))
-        ret = _sym.broadcast_add(ret, bias)
-    return ret
-
-def PoolingLayerParams(op, insym, symtab):
-    """get pooling parameters"""
-    if op.globalPooling:
-        if op.type == 0:
-            return _sym.global_max_pool2d(insym)
-        if op.type == 1:
-            return _sym.global_avg_pool2d(insym)
-        raise tvm.error.OpNotImplemented(
-            'Operator pooling (not max or average) is not supported in frontend CoreML.')
-
-    else:
-        params = {'pool_size':list(op.kernelSize),
-                  'strides':list(op.stride)}
-
-        if op.WhichOneof('PoolingPaddingType') == 'valid':
-            valid = op.valid
-            padding = [b.startEdgeSize for b in valid.paddingAmounts.borderAmounts]
-            padding2 = [b.endEdgeSize for b in valid.paddingAmounts.borderAmounts]
-            for i, j in zip(padding, padding2):
-                assert i == j
-            params['padding'] = padding
-        elif op.WhichOneof('PoolingPaddingType') == 'includeLastPixel':
-            # I don't know if this is correct
-            valid = op.includeLastPixel
-            padding = list(valid.paddingAmounts)
-            params['padding'] = padding
-            params['ceil_mode'] = True
-        else:
-            msg = 'Value {} in attribute PoolingPaddingType of operator Pooling is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(op.WhichOneof('PoolingPaddingType')))
-
-        # consume padding layer
-        if symtab.in_padding:
-            params['padding'] = [sum(x) for x in zip(
-                params.get('padding', [0, 0]), symtab.paddings)]
-            symtab.clear_padding()
-
-        if op.type == 0:
-            return _sym.max_pool2d(insym, **params)
-        if op.type == 1:
-            return _sym.avg_pool2d(insym, **params)
-        msg = 'Operator pooling (not max or average) is not supported in frontend CoreML.'
-        raise tvm.error.OpNotImplemented(msg)
-
-def SoftmaxLayerParams(op, insym, symtab):
-    return _sym.softmax(_sym.flatten(insym))
-
-def InnerProductLayerParams(op, insym, symtab):
-    weights = symtab.new_const(np.array(op.weights.floatValue).reshape(
-        (op.outputChannels, op.inputChannels)))
-    par = {'weight':weights, 'use_bias':False, 'units':op.outputChannels}
-    if op.hasBias:
-        bias = symtab.new_const(np.array(op.bias.floatValue))
-        par['bias'] = bias
-        par['use_bias'] = True
-    return _sym.dense(data=insym, **par)
-
-def AddLayerParams(op, insyms, symtab):
-    if not isinstance(insyms, list):
-        insyms = [insyms]
-    ret = insyms[0]
-    for i in range(1, len(insyms)):
-        ret = _sym.elemwise_add(ret, insyms[i])
-    if op.alpha > 0:
-        ret = _sym.__add_scalar__(ret, scalar=op.alpha)
-    return ret
-
-def MultiplyLayerParams(op, insyms, symtab):
-    if not isinstance(insyms, list):
-        insyms = [insyms]
-    ret = insyms[0]
-    for i in range(1, len(insyms)):
-        ret = _sym.elemwise_mul(ret, insyms[i])
-    if op.alpha != 1:
-        ret = _sym.__mul_scalar__(ret, scalar=op.alpha)
-    return ret
-
-def ConcatLayerParams(op, insyms, symtab):
-    if not isinstance(insyms, list):
-        insyms = [insyms]
-    if op.sequenceConcat:
-        raise tvm.error.OpNotImplemented(
-            'Operator Sequence Concat is not supported in frontend CoreML.')
-    ret = _sym.concatenate(*insyms, axis=1)
-    return ret
-
-def FlattenLayerParams(op, insym, symtab):
-    if op.mode == 1:
-        insym = _sym.transpose(_sym.reshape(insym, shape=(0, 0, -1)), axes=(0, 2, 1))
-    return _sym.flatten(insym)
-
-def PaddingLayerParams(op, insym, symtab):
-    """Hacking for padding layer params."""
-    if op.WhichOneof('PaddingType') == 'constant':
-        constant = op.constant
-        if constant.value != 0:
-            msg = 'Value {} in attribute "padding value" of operator Padding is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(constant.value))
-        padding = [b.startEdgeSize for b in op.paddingAmounts.borderAmounts]
-        padding2 = [b.endEdgeSize for b in op.paddingAmounts.borderAmounts]
-        for i, j in zip(padding, padding2):
-            assert i == j
-        symtab.set_padding(padding)
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator "non-constant padding" is not supported in frontend CoreML.')
-    return insym
-
-def PermuteLayerParams(op, insym, symtab):
-    axes = tuple(op.axis)
-    return _sym.transpose(insym, axes=axes)
-
-def UpsampleLayerParams(op, insym, symtab):
-    if op.scalingFactor[0] != op.scalingFactor[1]:
-        raise tvm.error.OpAttributeInvalid(
-            'Height and width scaling factors of Upsample operator must be equal.')
-    interpolationMode = 'NEAREST_NEIGHBOR' if op.mode == 0 else 'BILINEAR'
-    return _sym.upsampling(insym, scale=op.scalingFactor[0], method=interpolationMode)
-
-def L2NormalizeLayerParams(op, insym, symtab):
-    return _sym.l2_normalize(insym, eps=op.epsilon, axis=1)
-
-def LRNLayerParams(op, insym, symtab):
-    par = {}
-    par['size'] = op.localSize
-    par['bias'] = op.k
-    par['alpha'] = op.alpha
-    par['beta'] = op.beta
-    par['axis'] = 1 #default layout is nchw
-    return _sym.lrn(data=insym, **par)
-
-def AverageLayerParams(op, insyms, symtab):
-    if not isinstance(insyms, list) or len(insyms) < 2:
-        raise ValueError("Expect minimum 2 inputs")
-    count = len(insyms)
-    _sum = insyms[0]
-    for i in range(1, count):
-        _sum = _sym.broadcast_add(_sum, insyms[i])
-    return _sum / count
-
-def MaxLayerParams(op, insyms, symtab):
-    if not isinstance(insyms, list) or len(insyms) < 2:
-        raise ValueError("Expect minimum 2 inputs")
-    _max = insyms[0]
-    for i in range(1, len(insyms)):
-        _max = _sym.broadcast_max(_max, insyms[i])
-    return _max
-
-def MinLayerParams(op, insyms, symtab):
-    if not isinstance(insyms, list) or len(insyms) < 2:
-        raise ValueError("Expect minimum 2 inputs")
-    _min = insyms[0]
-    for i in range(1, len(insyms)):
-        _min = _sym.broadcast_min(_min, insyms[i])
-    return _min
-
-_convert_map = {
-    'NeuralNetworkMeanImage': NeuralNetworkMeanImage,
-    'NeuralNetworkImageScaler': NeuralNetworkImageScaler,
-    'ConvolutionLayerParams':ConvolutionLayerParams,
-    'BatchnormLayerParams':BatchnormLayerParams,
-    'ActivationParams':ActivationParams,
-    'ScaleLayerParams':ScaleLayerParams,
-    'PoolingLayerParams':PoolingLayerParams,
-    'SoftmaxLayerParams':SoftmaxLayerParams,
-    'InnerProductLayerParams':InnerProductLayerParams,
-    'AddLayerParams':AddLayerParams,
-    'MultiplyLayerParams':MultiplyLayerParams,
-    'FlattenLayerParams':FlattenLayerParams,
-    'ConcatLayerParams':ConcatLayerParams,
-    'PaddingLayerParams':PaddingLayerParams,
-    'PermuteLayerParams':PermuteLayerParams,
-    'UpsampleLayerParams':UpsampleLayerParams,
-    'L2NormalizeLayerParams':L2NormalizeLayerParams,
-    'LRNLayerParams':LRNLayerParams,
-    'AverageLayerParams':AverageLayerParams,
-    'MaxLayerParams':MaxLayerParams,
-    'MinLayerParams':MinLayerParams,
-}
-
-def coreml_op_to_nnvm(op, inname, outname, symtab):
-    """Convert coreml layer to nnvm layer.
-
-    Parameters
-    ----------
-    coremlop: a coreml protobuf bit
-
-    prevsym: previous nnvm symbol
-
-    Returns:
-    -------
-    nnvm.sym.Symbol
-        Converted symbol
-    """
-    classname = type(op).__name__
-    if classname not in _convert_map:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend CoreML.'.format(classname))
-    if isinstance(inname, string_types):
-        insym = symtab.get_var(inname)
-    else:
-        insym = [symtab.get_var(i) for i in inname]
-    ret = _convert_map[classname](op, insym, symtab)
-    if outname:
-        symtab.set_var(outname, ret)
-    if classname != 'PaddingLayerParams':
-        assert not symtab.in_padding, "Previous padding not consumed by conv/pool"
-
-def from_coreml(model):
-    """Convert from coreml model into NNVM format.
-
-    Parameters
-    ----------
-    model:
-        coremltools.models.MLModel of a NeuralNetworkClassifier
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.NDArray
-        The parameter dict to be used by nnvm
-    """
-    try:
-        import coremltools as cm
-    except ImportError:
-        raise ImportError('The coremltools package must be installed')
-
-    assert isinstance(model, cm.models.MLModel)
-    spec = model.get_spec()
-    modeltype = spec.WhichOneof('Type')
-    assert modeltype in ['neuralNetworkClassifier', 'neuralNetwork', 'neuralNetworkRegressor']
-    cc = getattr(spec, modeltype)
-
-    symtab = SymbolTable()
-    for i in spec.description.input:
-        symtab.get_var(i.name, must_contain=False)
-
-    for pp in cc.preprocessing:
-        whichpp = pp.WhichOneof('preprocessor')
-        ppmethod = getattr(pp, whichpp)
-        # the NeuralNetworkImageScalar doesn't seem to have a featureName?
-        if whichpp == 'scaler':
-            for i in spec.description.input:
-                coreml_op_to_nnvm(ppmethod, i.name, i.name, symtab)
-        else:
-            coreml_op_to_nnvm(ppmethod, pp.featureName, pp.featureName, symtab)
-
-    for l in cc.layers:
-        layertype = l.WhichOneof('layer')
-        layerop = getattr(l, layertype)
-        assert len(l.output) == 1
-        if len(l.input) == 1:
-            coreml_op_to_nnvm(layerop, l.input[0], l.output[0], symtab)
-        else:
-            coreml_op_to_nnvm(layerop, list(l.input), l.output[0], symtab)
-    returns = [symtab.get_var(i.name, must_contain=False) for i in spec.description.output]
-    tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()}
-    # for now return first output
-    return returns[0], tvmparams
diff --git a/nnvm/python/nnvm/frontend/darknet.py b/nnvm/python/nnvm/frontend/darknet.py
deleted file mode 100644
index 8c6020500b45..000000000000
--- a/nnvm/python/nnvm/frontend/darknet.py
+++ /dev/null
@@ -1,979 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-DarkNet symbol frontend.
-"""
-
-from __future__ import absolute_import as _abs
-import numpy as np
-import tvm
-from .. import symbol as _sym
-from .common import get_nnvm_op, required_attr, parse_tshape, parse_bool_str
-
-class LAYERTYPE(object):
-    """Darknet LAYERTYPE Class constant."""
-    CONVOLUTIONAL = 0
-    DECONVOLUTIONAL = 1
-    CONNECTED = 2
-    MAXPOOL = 3
-    SOFTMAX = 4
-    DETECTION = 5
-    DROPOUT = 6
-    CROP = 7
-    ROUTE = 8
-    COST = 9
-    NORMALIZATION = 10
-    AVGPOOL = 11
-    LOCAL = 12
-    SHORTCUT = 13
-    ACTIVE = 14
-    RNN = 15
-    GRU = 16
-    LSTM = 17
-    CRNN = 18
-    BATCHNORM = 19
-    NETWORK = 20
-    XNOR = 21
-    REGION = 22
-    YOLO = 23
-    REORG = 24
-    UPSAMPLE = 25
-    LOGXENT = 26
-    L2NORM = 27
-    BLANK = 28
-
-class ACTIVATION(object):
-    """Darknet ACTIVATION Class constant."""
-    LOGISTIC = 0
-    RELU = 1
-    RELIE = 2
-    LINEAR = 3
-    RAMP = 4
-    TANH = 5
-    PLSE = 6
-    LEAKY = 7
-    ELU = 8
-    LOGGY = 9
-    STAIR = 10
-    HARDTAN = 11
-    LHTAN = 12
-
-__all__ = ['from_darknet']
-
-def _darknet_maxpooling(inputs, attrs):
-    """Process the max pool 2d operation."""
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'maxpool'))
-    if len(kernel) != 1:
-        raise tvm.error.OpAttributeUnImplemented(
-            'Non-2D kernels for Max Pooling are not supported in frontend Darknet.')
-
-    op_name, new_attrs = 'max_pool2d', {}
-    strides = int(attrs.get('stride', (1, 1)))
-    pads = int(attrs.get('pad', (0, 0)))
-    new_attrs['pool_size'] = [kernel[0], kernel[0]]
-    new_attrs['strides'] = str((strides, strides))
-    new_attrs['padding'] = str((pads, pads))
-    extra_pad_size = attrs.get('extra_pad_size', 0)
-    if extra_pad_size:
-        pad_width = ((0, 0), (0, 0), (0, extra_pad_size), (0, extra_pad_size))
-        inputs = _sym.pad(*inputs, pad_width=pad_width, pad_value=np.finfo(np.float32).min)
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_avgpooling(inputs, attrs):
-    """Process the average pool 2d operation."""
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'avgpool'))
-    if len(kernel) != 1:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Non-2D kernels for Average Pooling are not supported in frontend Darknet.')
-
-    op_name, new_attrs = 'avg_pool2d', {}
-    strides = int(attrs.get('stride', (1, 1)))
-    pads = int(attrs.get('pad', (0, 0)))
-    new_attrs['pool_size'] = [kernel[0], kernel[0]]
-    new_attrs['strides'] = str((strides, strides))
-    new_attrs['padding'] = str((pads, pads))
-
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_batch_norm(inputs, attrs):
-    """Process the batchnormalization operation."""
-    op_name, new_attrs = 'darknet_batch_norm', {}
-    new_attrs['axis'] = attrs.get('axis', 1)
-    new_attrs['epsilon'] = attrs.get('eps', 0.000001)
-    new_attrs['center'] = True
-    new_attrs['scale'] = True
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_conv2d(inputs, attrs):
-    """Process the convolution 2d operation."""
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d'))
-    if len(kernel) != 1:
-        raise tvm.error.OpAttributeUnimplemented('Non-2D kernels for Conv2D are unsupported '
-                                                 'in frontend Darknet.')
-    layout = attrs.get('layout', 'NCHW')
-    if layout not in ['NCHW', 'NHWC']:
-        raise tvm.error.OpAttributeInvalid(
-            'Value {} in attribute "layout" of operator Conv2D is not valid.'.format(layout))
-    strides = int(attrs.get('stride', (1, 1)))
-    pads = int(attrs.get('pad', (0, 0)))
-
-    op_name, new_attrs = 'conv2d', {}
-    new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d')
-    new_attrs['kernel_size'] = [kernel[0], kernel[0]]
-    new_attrs['strides'] = (strides, strides)
-    new_attrs['padding'] = (pads, pads)
-    new_attrs['dilation'] = attrs.get('dilate', (1, 1))
-    new_attrs['groups'] = attrs.get('num_group', 1)
-    new_attrs['layout'] = layout
-    if attrs.get('use_batchNorm', False) is True:
-        new_attrs['use_bias'] = False
-    else:
-        new_attrs['use_bias'] = True
-    out_name = {}
-    sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    out_name[0] = sym.list_output_names()[0].replace('_output', '')
-
-    if attrs.get('use_batchNorm', False) is True:
-        op_name, new_attrs = 'batch_norm', {}
-        new_attrs['epsilon'] = 0.000001
-        sym = get_nnvm_op(op_name)(*sym, **new_attrs)
-        out_name[1] = sym.list_output_names()[0].replace('_output', '')
-    if 'activation' in attrs:
-        new_attrs = {}
-        new_attrs['activation'] = attrs['activation']
-        new_attrs['slope'] = 0.1
-        sym, _ = _darknet_activations(sym, new_attrs)
-    return sym, out_name
-
-
-def _darknet_conv2d_transpose(inputs, attrs):
-    """Process the convolution 2d transpose operation."""
-    if 'target_shape' in attrs:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Attribute "target_shape" is not supported in operator Conv2D-transpose.')
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d_transpose'))
-    if len(kernel) != 2:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Non-2D kernels are not supported in operator Conv2D-transpose.')
-    layout = attrs.get('layout', 'NCHW')
-    if layout not in ['NCHW', 'NHWC']:
-        msg = 'Value {} in attribute "layout" of operator Conv2D-transpose is not valid.'
-        raise tvm.error.OpAttributeInvalid(msg.format(layout))
-    op_name, new_attrs = 'conv2d_transpose', {}
-    new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d_transpose')
-    new_attrs['kernel_size'] = kernel
-    new_attrs['strides'] = attrs.get('stride', (1, 1))
-    new_attrs['output_padding'] = attrs.get('adj', (0, 0))
-    new_attrs['padding'] = attrs.get('pad', (0, 0))
-    new_attrs['dilation'] = attrs.get('dilate', (1, 1))
-    new_attrs['groups'] = attrs.get('num_group', 1)
-    new_attrs['layout'] = layout
-    new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_shortcut(inputs, attrs):
-    """Process the shortcut operation."""
-    op_name, new_attrs = 'elemwise_add', {}
-    input_0 = inputs[0]
-    input_1 = inputs[1]
-    input_0_channel = int(attrs['out_channel'])
-    input_1_channel = int(attrs['add_out_channel'])
-    input_0_size = int(attrs['out_size'])
-    input_1_size = int(attrs['add_out_size'])
-
-    if input_0_size > input_1_size:
-        scale = int(input_0_size/input_1_size)
-        input_1 = _sym.upsampling(input_1, scale=scale, name="_upsampling")
-    elif input_0_size < input_1_size:
-        stride = int(input_1_size/input_0_size)
-        input_1 = _sym.avg_pool2d(input_1, pool_size=(1, 1),
-                                  strides=(stride, stride), padding=(0, 0), name="_downsampling")
-
-    if input_0_channel != input_1_channel:
-        pad_channel = input_0_channel - input_1_channel
-        input_1 = _sym.pad(input_1, pad_width=((0, 0), (0, pad_channel), (0, 0), (0, 0)),
-                           pad_value=0.)
-
-    new_inputs = _as_list([input_0, input_1])
-    sym = get_nnvm_op(op_name)(*new_inputs, **new_attrs)
-    out_name = sym.list_output_names()[0].replace('_output', '')
-    if 'activation' in attrs:
-        new_attrs['activation'] = attrs['activation']
-        sym, _ = _darknet_activations(sym, new_attrs)
-    return sym, out_name
-
-def _darknet_dense(inputs, attrs):
-    """Process the dense operation."""
-    op_name, new_attrs = 'dense', {}
-    new_attrs['units'] = required_attr(attrs, 'num_hidden', 'dense')
-    out_name = {}
-    new_attrs['use_bias'] = attrs.get('use_bias', False)
-    if attrs.get('use_flatten', False) is True:
-        inputs[0] = _sym.flatten(inputs[0])
-    sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    out_name[0] = sym.list_output_names()[0].replace('_output', '')
-    if 'use_batchNorm' in attrs:
-        op_name, new_attrs = 'batch_norm', {}
-        new_attrs['epsilon'] = 0.000001
-        sym = get_nnvm_op(op_name)(*sym, **new_attrs)
-        out_name[1] = sym.list_output_names()[0].replace('_output', '')
-    if 'activation' in attrs:
-        new_attrs = {}
-        new_attrs['activation'] = attrs['activation']
-        sym, _ = _darknet_activations(sym, new_attrs)
-    return sym, out_name
-
-def _darknet_dropout(inputs, attrs):
-    """Process the dropout operation, its a blank operation."""
-    op_name, new_attrs = 'dropout', {}
-    new_attrs['rate'] = attrs.get('p', 0.5)
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_reshape(inputs, attrs):
-    """Process the reshape operation."""
-    if parse_bool_str(attrs, 'reverse'):
-        raise tvm.error.OpAttributeUnimplemented(
-            'Attribute "reverse" is not supported in operator Reshape.')
-    op_name, new_attrs = 'reshape', {}
-    new_attrs['shape'] = required_attr(attrs, 'shape', 'reshape')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_upsampling(inputs, attrs):
-    """Process the upsampling operation."""
-    op_name, new_attrs = 'upsampling', {}
-    new_attrs['scale'] = attrs.get('scale', 1)
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_l2normalize(inputs, attrs):
-    """Process the l2 normalization operation."""
-    op_name, new_attrs = 'l2_normalize', {}
-    new_attrs['eps'] = attrs.get('eps', 0)
-    new_attrs['axis'] = attrs.get('axis', 1)
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_softmax_output(inputs, attrs):
-    """Process the softmax operation."""
-    temperature = attrs.get('temperature', 1)
-    if temperature != 1:
-        inputs[0] = inputs[0] / float(temperature)
-    op_name, new_attrs = 'softmax', {}
-    if parse_bool_str(attrs, 'multi_output'):
-        new_attrs['axis'] = 1
-
-    if attrs.get('use_flatten', False) is True:
-        inputs[0] = _sym.flatten(inputs[0])
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_route(inputs, attrs):
-    """Process the route operation, which is equivalent to concat."""
-    op_name = 'concatenate'
-    new_attrs = {'axis': attrs.get('dim', 1)}
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_reorg(inputs, attrs):
-    """Process the reorg operation."""
-    op_name, new_attrs = 'yolo_reorg', {}
-    if 'stride' in attrs:
-        new_attrs = {'stride': attrs.get('stride', 1)}
-    return get_nnvm_op(op_name)(*inputs, **new_attrs), None
-
-def _darknet_region(inputs, attrs):
-    """Process the region operation."""
-    num = attrs.get('n', 1)
-    classes = attrs.get('classes', 1)
-    coords = attrs.get('coords', 0)
-    background = attrs.get('background', 0)
-    softmax = attrs.get('softmax', True)
-    input_shape = attrs.get('shape')
-
-    split_size = classes + coords + 1
-    intermediate_shape = (input_shape[0], num, split_size, input_shape[2], input_shape[3])
-    data_block = _sym.reshape(inputs[0], shape=intermediate_shape)
-    split_indices = (2, 4, 5)
-    split_res = _sym.split(data_block, indices_or_sections=split_indices, axis=2)
-    split_res0 = _sym.sigmoid(split_res[0])
-    if not background:
-        split_res2 = _sym.sigmoid(split_res[2])
-    else:
-        split_res2 = split_res[2]
-    if softmax:
-        split_res3 = _sym.softmax(split_res[3], axis=2)
-    concat_list = [split_res0, split_res[1], split_res2, split_res3]
-    out = _sym.concatenate(*concat_list, axis=2)
-    return _sym.reshape(out, shape=input_shape), None
-
-
-def _darknet_yolo(inputs, attrs):
-    """Process the yolo operation."""
-    num = attrs.get('n', 1)
-    classes = attrs.get('classes', 1)
-    input_shape = attrs.get('shape')
-    split_size = classes + 5
-    intermediate_shape = (input_shape[0], num, split_size, input_shape[2], input_shape[3])
-    data_block = _sym.reshape(inputs[0], shape=intermediate_shape)
-    split_indices = (2, 4)
-    split_res = _sym.split(data_block, indices_or_sections=split_indices, axis=2)
-    split_res0 = _sym.sigmoid(split_res[0])
-    split_res2 = _sym.sigmoid(split_res[2])
-    concat_list = [split_res0, split_res[1], split_res2]
-    out = _sym.concatenate(*concat_list, axis=2)
-    return _sym.reshape(out, shape=input_shape), None
-
-def _darknet_activations(inputs, attrs):
-    """Process the activation function."""
-    act = required_attr(attrs, 'activation', 'activations')
-    if ACTIVATION.LOGISTIC == act:
-        act_type = 'sigmoid'
-    elif ACTIVATION.RELU == act:
-        act_type = 'relu'
-    elif ACTIVATION.TANH == act:
-        act_type = 'tanh'
-    elif ACTIVATION.LINEAR == act:
-        return inputs, None
-    elif ACTIVATION.LEAKY == act:
-        act_type = 'leaky_relu'
-    elif ACTIVATION.ELU == act:
-        act_type = 'elu'
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator act: {} is not supported in framework Darknet.'.format(act))
-
-    if act_type in ['relu', 'tanh']:
-        op_name, new_attrs = act_type, {}
-        sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    elif act_type in ['leaky_relu']:
-        op_name, new_attrs = act_type, {}
-        new_attrs['alpha'] = attrs.get('slope', 0.1)
-        sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    elif act_type in ['elu']:
-        sym = -1 * _sym.relu(1 - _sym.exp(*inputs)) + _sym.relu(*inputs)
-    elif act_type in ['sigmoid']:
-        op_name, new_attrs = act_type, {}
-        sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator act: {} is not supported in framework Darknet.'.format(act))
-    return sym, None
-
-def _darknet_op_not_support(inputs, attrs):
-    """Raise exception if the operation is not supported."""
-    err = "{} is not supported in {}.".format(attrs, inputs)
-    raise NotImplementedError(err)
-
-_DARKNET_CONVERT_MAP = {
-    LAYERTYPE.CONVOLUTIONAL   : _darknet_conv2d,
-    LAYERTYPE.DECONVOLUTIONAL : _darknet_conv2d_transpose,
-    LAYERTYPE.CONNECTED       : _darknet_dense,
-    LAYERTYPE.MAXPOOL         : _darknet_maxpooling,
-    LAYERTYPE.SOFTMAX         : _darknet_softmax_output,
-    LAYERTYPE.DROPOUT         : _darknet_dropout,
-    LAYERTYPE.AVGPOOL         : _darknet_avgpooling,
-    LAYERTYPE.BATCHNORM       : _darknet_batch_norm,
-    LAYERTYPE.ROUTE           : _darknet_route,
-    LAYERTYPE.REORG           : _darknet_reorg,
-    LAYERTYPE.REGION          : _darknet_region,
-    LAYERTYPE.SHORTCUT        : _darknet_shortcut,
-    LAYERTYPE.UPSAMPLE        : _darknet_upsampling,
-    LAYERTYPE.L2NORM          : _darknet_l2normalize,
-    LAYERTYPE.YOLO            : _darknet_yolo,
-    LAYERTYPE.DETECTION       : _darknet_op_not_support,
-    LAYERTYPE.CROP            : _darknet_op_not_support,
-    LAYERTYPE.COST            : _darknet_op_not_support,
-    LAYERTYPE.NORMALIZATION   : _darknet_op_not_support,
-    LAYERTYPE.LOCAL           : _darknet_op_not_support,
-    LAYERTYPE.ACTIVE          : _darknet_op_not_support,
-    LAYERTYPE.RNN             : _darknet_op_not_support,
-    LAYERTYPE.GRU             : _darknet_op_not_support,
-    LAYERTYPE.LSTM            : _darknet_op_not_support,
-    LAYERTYPE.CRNN            : _darknet_op_not_support,
-    LAYERTYPE.NETWORK         : _darknet_op_not_support,
-    LAYERTYPE.XNOR            : _darknet_op_not_support,
-    LAYERTYPE.BLANK           : _darknet_op_not_support,
-}
-
-def _darknet_convert_symbol(op_name, inputs, attrs):
-    """Convert from darknet op to nnvm op.
-    The converter must specify some conversions explicitly to
-    support gluon format ops such as conv2d...
-
-    Parameters
-    ----------
-    op_name : str
-        Operator name, such as Convolution, Connected, etc
-    inputs : list of nnvm.Symbol
-        List of input symbols.
-    attrs : dict
-        Dict of operator attributes
-
-    Returns
-    -------
-    out_name : converted out name of operation
-    sym : nnvm.Symbol
-        Converted nnvm Symbol
-    """
-
-    if op_name in _DARKNET_CONVERT_MAP:
-        sym, out_name = _DARKNET_CONVERT_MAP[op_name](inputs, attrs)
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend Darknet.'.format(op_name))
-    if out_name is  None:
-        out_name = sym.list_output_names()[0].replace('_output', '')
-    return out_name, sym
-
-
-def _as_list(arr):
-    """Force being a list, ignore if already is."""
-    if isinstance(arr, list):
-        return arr
-    return [arr]
-
-
-class GraphProto(object):
-    """A helper class for handling nnvm graph copying from darknet model.
-    """
-
-    def __init__(self, net, dtype='float32'):
-        self.net = net
-        self.dtype = dtype
-        self._sym_array = {}
-        self._tvmparams = {}
-        self._outs = []
-        self._state_ctr = {}
-        self._state_ctr['rnn'] = 0
-        self._state_ctr['crnn'] = 0
-        self._state_ctr['lstm'] = 0
-        self._state_ctr['cell_state'] = 0
-        self._state_ctr['gru'] = 0
-
-    def _read_memory_buffer(self, shape, data, dtype=None):
-        if dtype is None:
-            dtype = self.dtype
-        length = 1
-        for x in shape:
-            length *= x
-        data_np = np.zeros(length, dtype=dtype)
-        for i in range(length):
-            data_np[i] = data[i]
-        return data_np.reshape(shape)
-
-    def _get_convolution_weights(self, layer, opname):
-        """Get the convolution layer weights and biases."""
-        if layer.nweights == 0:
-            return
-
-        if layer.n * layer.c * layer.size * layer.size != layer.nweights:
-            msg = 'nweights ({}) != n * c * h * w ({}) in operator {}'
-            msg = msg.format(layer.nweights, layer.n * layer.c * layer.size ** 2, opname)
-            raise tvm.error.OpAttributeInvalid(msg)
-
-        shape = (layer.n, layer.c, layer.size, layer.size)
-        weights = self._read_memory_buffer(shape, layer.weights)
-
-        biases = self._read_memory_buffer((layer.n, ), layer.biases)
-
-        k = self._get_tvm_params_name(opname[0], 'weight')
-        self._tvmparams[k] = tvm.nd.array(weights)
-
-        if layer.batch_normalize == 1 and layer.dontloadscales != 1:
-            self._get_batchnorm_weights(layer, opname[1], layer.n)
-            k = self._get_tvm_params_name(opname[1], 'beta')
-            self._tvmparams[k] = tvm.nd.array(biases)
-        else:
-            k = self._get_tvm_params_name(opname[0], 'bias')
-            self._tvmparams[k] = tvm.nd.array(biases)
-
-    def _get_connected_weights(self, layer, opname):
-        """Parse the weights and biases for fully connected or dense layer."""
-        size = layer.outputs * layer.inputs
-        if size == 0:
-            return
-
-        weights = self._read_memory_buffer((layer.outputs, layer.inputs), layer.weights)
-        biases = self._read_memory_buffer((layer.outputs, ), layer.biases)
-
-        k = self._get_tvm_params_name(opname[0], 'weight')
-        self._tvmparams[k] = tvm.nd.array(weights)
-
-        if layer.batch_normalize == 1 and layer.dontloadscales != 1:
-            self._get_batchnorm_weights(layer, opname[1], layer.outputs)
-            k = self._get_tvm_params_name(opname[1], 'beta')
-            self._tvmparams[k] = tvm.nd.array(biases)
-        else:
-            k = self._get_tvm_params_name(opname[0], 'bias')
-            self._tvmparams[k] = tvm.nd.array(biases)
-
-    def _get_region_weights(self, layer, opname):
-        """Parse the biases for region layer."""
-        biases = self._read_memory_buffer((layer.n*2, ), layer.biases)
-        attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w,
-                               layer.classes, layer.coords, layer.background],
-                              dtype=np.int32)
-        k = self._get_tvm_params_name(opname, 'bias')
-        self._tvmparams[k] = tvm.nd.array(biases)
-        k = self._get_tvm_params_name(opname, 'attr')
-        self._tvmparams[k] = tvm.nd.array(attributes)
-
-    def _get_yolo_weights(self, layer, opname):
-        """Parse the biases and mask for yolo layer."""
-        biases = self._read_memory_buffer((layer.total*2, ), layer.biases)
-        mask = self._read_memory_buffer((layer.n, ), layer.mask, dtype='int32')
-        attributes = np.array([layer.n, layer.out_c, layer.out_h, layer.out_w,
-                               layer.classes, layer.total],
-                              dtype=np.int32)
-        k = self._get_tvm_params_name(opname, 'bias')
-        self._tvmparams[k] = tvm.nd.array(biases)
-        k = self._get_tvm_params_name(opname, 'mask')
-        self._tvmparams[k] = tvm.nd.array(mask)
-        k = self._get_tvm_params_name(opname, 'attr')
-        self._tvmparams[k] = tvm.nd.array(attributes)
-
-    def _get_batchnorm_weights(self, layer, opname, size):
-        """Parse the weights for batchnorm, which includes, scales, moving mean
-        and moving variances."""
-        scales = self._read_memory_buffer((size, ), layer.scales)
-        rolling_mean = self._read_memory_buffer((size, ), layer.rolling_mean)
-        rolling_variance = self._read_memory_buffer((size, ), layer.rolling_variance)
-
-        k = self._get_tvm_params_name(opname, 'moving_mean')
-        self._tvmparams[k] = tvm.nd.array(rolling_mean)
-        k = self._get_tvm_params_name(opname, 'moving_var')
-        self._tvmparams[k] = tvm.nd.array(rolling_variance)
-        k = self._get_tvm_params_name(opname, 'gamma')
-        self._tvmparams[k] = tvm.nd.array(scales)
-
-    def _get_darknet_attrs(self, layer, layer_num):
-        """Parse attributes of each layer and return."""
-        attr = {}
-        use_flatten = True
-        if LAYERTYPE.CONVOLUTIONAL == layer.type:
-            attr.update({'layout' : 'NCHW'})
-            attr.update({'pad' : str(layer.pad)})
-            attr.update({'num_group' : str(layer.groups)})
-            attr.update({'num_filter' : str(layer.n)})
-            attr.update({'stride' : str(layer.stride)})
-            attr.update({'kernel' : str(layer.size)})
-            attr.update({'activation' : (layer.activation)})
-
-            if layer.nbiases == 0:
-                attr.update({'use_bias' : False})
-            else:
-                attr.update({'use_bias' : True})
-
-            if layer.batch_normalize == 1 and layer.dontloadscales != 1:
-                attr.update({'use_batchNorm' : True})
-                attr.update({'use_scales' : True})
-
-        elif LAYERTYPE.CONNECTED == layer.type:
-            attr.update({'num_hidden' : str(layer.outputs)})
-            attr.update({'activation' : (layer.activation)})
-            if layer_num != 0:
-                layer_prev = self.net.layers[layer_num - 1]
-                if (layer_prev.out_h == layer.h and
-                        layer_prev.out_w == layer.w and
-                        layer_prev.out_c == layer.c):
-                    use_flatten = False
-            attr.update({'use_flatten' : use_flatten})
-            attr.update({'use_bias' : True})
-            if layer.batch_normalize == 1 and layer.dontloadscales != 1:
-                attr.update({'use_batchNorm' : True})
-                attr.update({'use_scales' : True})
-                attr.update({'use_bias' : False})
-
-        elif LAYERTYPE.MAXPOOL == layer.type:
-            attr.update({'pad' : str(layer.pad)})
-            attr.update({'stride' : str(layer.stride)})
-            attr.update({'kernel' : str(layer.size)})
-            max_output = (layer.w - layer.size + 2 * layer.pad)/float(layer.stride) + 1
-            if max_output < layer.out_w:
-                extra_pad = (layer.out_w - max_output)*layer.stride
-                attr.update({'extra_pad_size' : int(extra_pad)})
-        elif LAYERTYPE.AVGPOOL == layer.type:
-            attr.update({'pad' : str(layer.pad)})
-            if layer.stride == 0:
-                attr.update({'stride' : str(1)})
-            else:
-                attr.update({'stride' : str(layer.stride)})
-            if layer.size == 0 and layer.h == layer.w:
-                attr.update({'kernel' : str(layer.h)})
-            else:
-                attr.update({'kernel' : str(layer.size)})
-
-        elif LAYERTYPE.DROPOUT == layer.type:
-            attr.update({'p' : str(layer.probability)})
-
-        elif LAYERTYPE.SOFTMAX == layer.type:
-            attr.update({'axis' : 1})
-            attr.update({'use_flatten' : True})
-            if layer.temperature:
-                attr.update({'temperature' : str(layer.temperature)})
-
-        elif LAYERTYPE.SHORTCUT == layer.type:
-            add_layer = self.net.layers[layer.index]
-            attr.update({'activation' : (layer.activation)})
-            attr.update({'out_channel' : (layer.out_c)})
-            attr.update({'out_size' : (layer.out_h)})
-            attr.update({'add_out_channel' : (add_layer.out_c)})
-            attr.update({'add_out_size' : (add_layer.out_h)})
-
-        elif LAYERTYPE.ROUTE == layer.type:
-            pass
-
-        elif LAYERTYPE.COST == layer.type:
-            pass
-
-        elif LAYERTYPE.REORG == layer.type:
-            attr.update({'stride' : layer.stride})
-
-        elif LAYERTYPE.REGION == layer.type:
-            attr.update({'n' : layer.n})
-            attr.update({'classes' : layer.classes})
-            attr.update({'coords' : layer.coords})
-            attr.update({'background' : layer.background})
-            attr.update({'softmax' : layer.softmax})
-            attr.update({'shape' : (1, layer.c, layer.h, layer.w)})
-
-        elif LAYERTYPE.YOLO == layer.type:
-            attr.update({'n' : layer.n})
-            attr.update({'classes' : layer.classes})
-            attr.update({'shape' : (1, layer.c, layer.h, layer.w)})
-
-        elif LAYERTYPE.UPSAMPLE == layer.type:
-            attr.update({'scale' : layer.stride})
-
-        elif LAYERTYPE.L2NORM == layer.type:
-            pass
-
-        else:
-            raise tvm.error.OpNotImplemented(
-                'Operator {} is not supported in frontend Darknet.'.format(layer.type))
-
-        return attr
-
-    def _get_tvm_params_name(self, opname, arg_name):
-        """Makes the params name for the k,v pair."""
-        return opname + '_'+ arg_name
-
-    def _get_darknet_params(self, layer, opname):
-        """To parse and get the darknet params."""
-        if LAYERTYPE.CONVOLUTIONAL == layer.type:
-            self._get_convolution_weights(layer, opname)
-
-        elif LAYERTYPE.CONNECTED == layer.type:
-            self._get_connected_weights(layer, opname)
-
-        elif LAYERTYPE.REGION == layer.type:
-            self._get_region_weights(layer, opname)
-
-        elif LAYERTYPE.YOLO == layer.type:
-            self._get_yolo_weights(layer, opname)
-    def _preproc_layer(self, layer, layer_num):
-        """To preprocess each darknet layer, some layer doesnt need processing."""
-        if layer_num == 0:
-            name = 'data'
-            attribute = {}
-            sym = [_sym.Variable(name, **attribute)]
-        else:
-            sym = self._sym_array[layer_num - 1]
-        skip_layer = False
-
-        if LAYERTYPE.ROUTE == layer.type:
-            sym = []
-            for j in range(layer.n):
-                sym.append(self._sym_array[layer.input_layers[j]])
-            if layer.n == 1:
-                skip_layer = True
-
-        elif LAYERTYPE.COST == layer.type:
-            skip_layer = True
-
-        elif LAYERTYPE.SHORTCUT == layer.type:
-            sym = [sym, self._sym_array[layer.index]]
-
-        elif LAYERTYPE.BLANK == layer.type:
-            skip_layer = True
-
-        if skip_layer is True:
-            self._sym_array[layer_num] = sym
-
-        return skip_layer, sym
-
-    def _get_opname(self, layer):
-        """Returs the layer name."""
-        return layer.type
-
-    def _new_rnn_state_sym(self, state=None, name='rnn'):
-        """Returs a symbol for state"""
-        sym_name = name + "%d_state" % self._state_ctr[name]
-        self._state_ctr[name] += 1
-        return _sym.Variable(name=sym_name, init=state)
-
-    def _get_rnn_state_buffer(self, layer, name):
-        """Get the state buffer for rnn."""
-        buffer = np.zeros((1, layer.outputs), self.dtype)
-        return self._new_rnn_state_sym(buffer, name)
-
-    def _get_darknet_rnn_attrs(self, layer, sym):
-        """Get the rnn converted symbol from attributes."""
-        attr = self._get_darknet_attrs(layer, 0)
-        op_name = self._get_opname(layer)
-        layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr)
-        self._get_darknet_params(layer, layer_name)
-        return sym
-
-    def _handle_darknet_rnn_layers(self, layer_num, sym):
-        """Parse attributes and handle the rnn layers."""
-        attr = {}
-        layer = self.net.layers[layer_num]
-        processed = False
-
-        if LAYERTYPE.RNN == layer.type:
-            attr.update({'n' : layer.n})
-            attr.update({'batch' : layer.batch})
-            attr.update({'num_hidden' : str(layer.outputs)})
-
-            state = self._get_rnn_state_buffer(layer, 'rnn')
-
-            for _ in range(layer.steps):
-                input_layer = layer.input_layer
-                sym = self._get_darknet_rnn_attrs(input_layer, sym)
-
-                self_layer = layer.self_layer
-                state = self._get_darknet_rnn_attrs(self_layer, state)
-
-                op_name, new_attrs = 'elemwise_add', {}
-                new_inputs = _as_list([sym, state])
-                state = get_nnvm_op(op_name)(*new_inputs, **new_attrs)
-                self._outs.append(state)
-
-                output_layer = layer.output_layer
-                sym = self._get_darknet_rnn_attrs(output_layer, state)
-
-            self._sym_array[layer_num] = sym
-            processed = True
-
-        elif LAYERTYPE.CRNN == layer.type:
-            attr.update({'n' : layer.n})
-            attr.update({'batch' : layer.batch})
-            attr.update({'num_hidden' : str(layer.outputs)})
-
-            state = self._get_rnn_state_buffer(layer, 'crnn')
-
-            for _ in range(layer.steps):
-                input_layer = layer.input_layer
-                sym = self._get_darknet_rnn_attrs(input_layer, sym)
-
-                self_layer = layer.self_layer
-                state = self._get_darknet_rnn_attrs(self_layer, state)
-
-                op_name, new_attrs = 'elemwise_add', {}
-                new_inputs = _as_list([sym, state])
-                state = get_nnvm_op(op_name)(*new_inputs, **new_attrs)
-                self._outs.append(state)
-
-                output_layer = layer.output_layer
-                sym = self._get_darknet_rnn_attrs(output_layer, state)
-
-            self._sym_array[layer_num] = sym
-            processed = True
-
-        elif LAYERTYPE.LSTM == layer.type:
-            if layer.steps > 1:
-                raise tvm.error.OpAttributeInvalid(
-                    'Number of steps {} of RNN is not valid.'.format(layer.steps))
-
-            op_name_add = 'elemwise_add'
-            op_name_mul = 'elemwise_mul'
-            attrs = {}
-            act_attr = {}
-
-            h_state = self._get_rnn_state_buffer(layer, 'lstm')
-            c_state = self._get_rnn_state_buffer(layer, 'cell_state')
-            for _ in range(layer.steps):
-                sym_wf = self._get_darknet_rnn_attrs(layer.wf, h_state)
-                sym_wi = self._get_darknet_rnn_attrs(layer.wi, h_state)
-                sym_wg = self._get_darknet_rnn_attrs(layer.wg, h_state)
-                sym_wo = self._get_darknet_rnn_attrs(layer.wo, h_state)
-
-                input_sym = sym
-                sym_uf = self._get_darknet_rnn_attrs(layer.uf, input_sym)
-                sym_ui = self._get_darknet_rnn_attrs(layer.ui, input_sym)
-                sym_ug = self._get_darknet_rnn_attrs(layer.ug, input_sym)
-                sym_uo = self._get_darknet_rnn_attrs(layer.uo, input_sym)
-
-                new_inputs = _as_list([sym_wf, sym_uf])
-                add_f = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                new_inputs = _as_list([sym_wi, sym_ui])
-                add_i = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                new_inputs = _as_list([sym_wg, sym_ug])
-                add_g = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                new_inputs = _as_list([sym_wo, sym_uo])
-                add_o = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                act_attr['activation'] = ACTIVATION.LOGISTIC
-                act_f, _ = _darknet_activations(_as_list(add_f), act_attr)
-
-                act_attr['activation'] = ACTIVATION.LOGISTIC
-                act_i, _ = _darknet_activations(_as_list(add_i), act_attr)
-
-                act_attr['activation'] = ACTIVATION.TANH
-                act_g, _ = _darknet_activations(_as_list(add_g), act_attr)
-
-                act_attr['activation'] = ACTIVATION.LOGISTIC
-                act_o, _ = _darknet_activations(_as_list(add_o), act_attr)
-
-                new_inputs = _as_list([act_i, act_g])
-                mul_t = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-
-                new_inputs = _as_list([act_f, c_state])
-                c_state = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-
-                new_inputs = _as_list([mul_t, c_state])
-                c_state = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                act_attr['activation'] = ACTIVATION.TANH
-                h_state, _ = _darknet_activations(_as_list(c_state), act_attr)
-
-                new_inputs = _as_list([act_o, h_state])
-                h_state = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-                self._outs = self._outs + [c_state, h_state]
-                sym = h_state
-            self._sym_array[layer_num] = sym
-            processed = True
-
-        elif LAYERTYPE.GRU == layer.type:
-            if layer.steps > 1:
-                raise tvm.error.OpAttributeInvalid(
-                    'Number of steps {} is not valid in RNN.'.format(layer.steps))
-
-            op_name_add = 'elemwise_add'
-            op_name_mul = 'elemwise_mul'
-            attrs = {}
-            act_attr = {}
-
-            state = self._get_rnn_state_buffer(layer, "gru")
-            for _ in range(layer.steps):
-                sym_wz = self._get_darknet_rnn_attrs(layer.wz, state)
-                sym_wr = self._get_darknet_rnn_attrs(layer.wr, state)
-
-                input_sym = sym
-                sym_uz = self._get_darknet_rnn_attrs(layer.uz, input_sym)
-                sym_ur = self._get_darknet_rnn_attrs(layer.ur, input_sym)
-                sym_uh = self._get_darknet_rnn_attrs(layer.uh, input_sym)
-
-                new_inputs = _as_list([sym_uz, sym_wz])
-                add_z = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                new_inputs = _as_list([sym_ur, sym_wr])
-                add_r = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                act_attr['activation'] = ACTIVATION.LOGISTIC
-                act_z, _ = _darknet_activations(_as_list(add_z), act_attr)
-
-                act_attr['activation'] = ACTIVATION.LOGISTIC
-                act_r, _ = _darknet_activations(_as_list(add_r), act_attr)
-
-                new_inputs = _as_list([act_r, state])
-                forgot = get_nnvm_op(op_name_mul)(*new_inputs, **attrs)
-
-                sym_wh = self._get_darknet_rnn_attrs(layer.wh, forgot)
-
-                new_inputs = _as_list([sym_uh, sym_wh])
-                h_state = get_nnvm_op(op_name_add)(*new_inputs, **attrs)
-
-                if layer.tanh == 1:
-                    act_attr['activation'] = ACTIVATION.TANH
-                else:
-                    act_attr['activation'] = ACTIVATION.LOGISTIC
-                h_state, _ = _darknet_activations(_as_list(h_state), act_attr)
-
-                sym = act_z * state + (1 - act_z) * h_state
-
-                self._outs = self._outs + [sym]
-            self._sym_array[layer_num] = sym
-            processed = True
-
-        return processed, sym
-
-    def _make_outlist(self, sym, op_name, layer, layer_num):
-        if layer.type == LAYERTYPE.REGION:
-            k = self._get_tvm_params_name(op_name, 'attr')
-            self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
-            k = self._get_tvm_params_name(op_name, 'bias')
-            self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
-            if layer_num != self.net.n-1:
-                self._outs.insert(0, sym)
-
-        elif layer.type == LAYERTYPE.YOLO:
-            k = self._get_tvm_params_name(op_name, 'attr')
-            self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
-            k = self._get_tvm_params_name(op_name, 'bias')
-            self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
-            k = self._get_tvm_params_name(op_name, 'mask')
-            self._outs.insert(0, _sym.Variable(name=k, init=self._tvmparams[k].asnumpy()))
-            if layer_num != self.net.n-1:
-                self._outs.insert(0, sym)
-
-    def from_darknet(self):
-        """To convert the darknet symbol to nnvm symbols."""
-        for i in range(self.net.n):
-            layer = self.net.layers[i]
-            need_skip, sym = self._preproc_layer(layer, i)
-            if need_skip is True:
-                continue
-
-            processed, sym = self._handle_darknet_rnn_layers(i, sym)
-            if processed is True:
-                continue
-
-            attr = self._get_darknet_attrs(layer, i)
-            op_name = self._get_opname(layer)
-            layer_name, sym = _darknet_convert_symbol(op_name, _as_list(sym), attr)
-            self._get_darknet_params(self.net.layers[i], layer_name)
-            self._sym_array[i] = sym
-            self._make_outlist(sym, layer_name, layer, i)
-
-        self._outs = _as_list(sym) + self._outs
-        if isinstance(self._outs, list):
-            sym = _sym.Group(self._outs)
-        return sym, self._tvmparams
-
-def from_darknet(net, dtype='float32'):
-    """Convert from darknet's model into compatible NNVM format.
-    Reconstruct a nnvm symbol by traversing the darknet input.
-
-    Parameters
-    ----------
-    net : ctype Pointer to network
-        Darknet parsed symbols
-
-    dtype : str
-        Datatype of the input net structure, default is float32
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.NDArray
-        The parameter dict to be used by nnvm
-    """
-
-    return GraphProto(net, dtype).from_darknet()
diff --git a/nnvm/python/nnvm/frontend/keras.py b/nnvm/python/nnvm/frontend/keras.py
deleted file mode 100644
index f647a644bd2b..000000000000
--- a/nnvm/python/nnvm/frontend/keras.py
+++ /dev/null
@@ -1,727 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, import-self
-"""Keras frontend."""
-from __future__ import absolute_import as _abs
-import sys
-import numpy as np
-import tvm
-from .. import symbol as _sym
-from .common import SymbolTable
-
-__all__ = ['from_keras']
-
-
-def _check_data_format(keras_layer):
-    if hasattr(keras_layer, ('data_format')):
-        if keras_layer.data_format != 'channels_last':
-            raise ValueError("Keras frontend currently supports data_format = channels_last only.")
-
-
-def _get_pad_pair(input1d, kernel1d, stride1d):
-    out1d = (input1d + stride1d - 1) // stride1d
-    pad = np.maximum((out1d - 1) * stride1d + kernel1d - input1d, 0)
-    pad_before = pad // 2
-    pad_after = pad - pad_before
-    return [pad_before, pad_after]
-
-def _get_elu(insym, alpha):
-    """ A helper method for elu.
-    """
-    return -alpha * _sym.relu(1 - _sym.exp(insym)) + _sym.relu(insym)
-
-def _convert_recurrent_activation(insym, keras_layer):
-    act_type = keras_layer.recurrent_activation.__name__
-    return _convert_activation(insym, act_type, None)
-
-def _convert_activation(insym, keras_layer, _):
-    if isinstance(keras_layer, str):
-        act_type = keras_layer
-    else:
-        if sys.version_info.major < 3:
-            act_type = keras_layer.activation.func_name
-        else:
-            act_type = keras_layer.activation.__name__
-    if act_type == 'linear':
-        if isinstance(keras_layer, str):
-            return insym
-        alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1
-        beta = keras_layer.beta if hasattr(keras_layer, "beta") else 0
-        return _sym.__add_scalar__(_sym.__mul_scalar__(insym, \
-            scalar=alpha), scalar=beta)
-    if act_type == 'softmax':
-        return _sym.softmax(insym, axis=1)
-    if act_type == 'sigmoid':
-        return _sym.sigmoid(insym)
-    if act_type == 'tanh':
-        return _sym.tanh(insym)
-    if act_type == 'relu':
-        return _sym.relu(insym)
-    if act_type == 'softplus':
-        return _sym.log(_sym.__add_scalar__(_sym.exp(insym), scalar=1))
-    if act_type == 'elu':
-        alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1
-        return _get_elu(insym, alpha)
-    if act_type == 'selu':
-        # Alpha, Gamma values, obtained from  https://arxiv.org/abs/1706.02515
-        alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") \
-            else 1.6732632423543772848170429916717
-        gamma = keras_layer.gamma if hasattr(keras_layer, "gamma") \
-            else 1.0507009873554804934193349852946
-        return gamma * _get_elu(insym, alpha)
-    if act_type == 'relu6':
-        return _sym.clip(insym, a_min=0, a_max=6)
-    if act_type == 'softsign':
-        return insym / (1 + (_sym.relu(insym) + _sym.relu(_sym.negative(insym))))
-    if act_type == 'hard_sigmoid':
-        transformX = (0.2 * insym) + 0.5
-        return _sym.clip(transformX, a_min=0, a_max=1)
-    raise tvm.error.OpNotImplemented(
-        'Operator {} is not supported in frontend Keras.'.format(act_type))
-
-
-def _convert_advanced_activation(insym, keras_layer, symtab):
-    act_type = type(keras_layer).__name__
-    if act_type == 'ReLU':
-        if keras_layer.max_value:
-            return _sym.clip(insym, a_min=0, a_max=keras_layer.max_value)
-        return _sym.relu(insym)
-    if act_type == 'LeakyReLU':
-        return _sym.leaky_relu(insym, alpha=keras_layer.alpha)
-    if act_type == 'ELU':
-        alpha = keras_layer.alpha if hasattr(keras_layer, "alpha") else 1
-        return _get_elu(insym, alpha)
-    if act_type == 'PReLU':
-        assert hasattr(keras_layer, "alpha"), \
-            "alpha required for PReLU."
-        _check_data_format(keras_layer)
-        size = len(keras_layer.alpha.shape)
-        return -symtab.new_const(keras_layer.get_weights()[0] \
-                                 .transpose(np.roll(range(size), 1))) \
-                                 * _sym.relu(-insym) + _sym.relu(insym)
-    if act_type == 'ThresholdedReLU':
-        theta = keras_layer.theta if hasattr(keras_layer, "theta") else 1.0
-        theta_tensor = _sym.full_like(insym[0], fill_value=float(theta))
-        return _sym.elemwise_mul(insym[0], _sym.greater(insym[0], theta_tensor, out_type="float32"))
-    raise tvm.error.OpNotImplemented(
-        'Operator {} is not supported in frontend Keras.'.format(act_type))
-
-
-def _convert_merge(insym, keras_layer, _):
-    merge_type = type(keras_layer).__name__
-    ret = insym[0]
-    for i in range(1, len(insym)):
-        if merge_type == 'Add':
-            ret = _sym.elemwise_add(ret, insym[i])
-        elif merge_type == 'Subtract':
-            ret = _sym.elemwise_sub(ret, insym[i])
-        elif merge_type == 'Multiply':
-            ret = _sym.elemwise_mul(ret, insym[i])
-        else:
-            raise tvm.error.OpNotImplemented(
-                'Operator {} Merge is not supported in frontend Keras.'.format(merge_type))
-    return ret
-
-
-def _convert_dense(insym, keras_layer, symtab):
-    weightList = keras_layer.get_weights()
-    weight = symtab.new_const(weightList[0].transpose([1, 0]))
-    params = {'weight':weight, 'use_bias':False, 'units':weightList[0].shape[1]}
-    if keras_layer.use_bias:
-        params['use_bias'] = True
-        params['bias'] = symtab.new_const(weightList[1])
-    input_shape = keras_layer.input_shape
-    input_dim = len(input_shape)
-    # In case of RNN dense, input shape will be (1, 1, n)
-    if input_dim > 2:
-        input_shape = tuple(dim if dim else 1 for dim in _as_list(input_shape)[0])
-        if input_dim != 3 or input_shape[0] != 1 or input_shape[1] != 1:
-            msg = 'Value {} in attribute "input_shape" of operator Dense is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(input_shape))
-        insym = _sym.squeeze(insym, axis=0)
-    out = _sym.dense(data=insym, **params)
-    # defuse activation
-    if sys.version_info.major < 3:
-        act_type = keras_layer.activation.func_name
-    else:
-        act_type = keras_layer.activation.__name__
-    if act_type != 'linear':
-        out = _convert_activation(out, act_type, symtab)
-    if input_dim > 2:
-        out = _sym.expand_dims(out, axis=0)
-    return out
-
-
-def _convert_convolution(insym, keras_layer, symtab):
-    _check_data_format(keras_layer)
-    is_deconv = type(keras_layer).__name__ == 'Conv2DTranspose'
-    is_depthconv = type(keras_layer).__name__ == 'DepthwiseConv2D'
-    weightList = keras_layer.get_weights()
-    if is_deconv:
-        kernel_h, kernel_w, n_filters, in_channels = weightList[0].shape
-        weight = weightList[0].transpose([3, 2, 0, 1])
-    elif is_depthconv:
-        kernel_h, kernel_w, in_channels, depth_mult = weightList[0].shape
-        weight = weightList[0].transpose([2, 3, 0, 1])
-    else:
-        kernel_h, kernel_w, in_channels, n_filters = weightList[0].shape
-        weight = weightList[0].transpose([3, 2, 0, 1])
-    if isinstance(keras_layer.dilation_rate, (list, tuple)):
-        dilation = [keras_layer.dilation_rate[0], keras_layer.dilation_rate[1]]
-    else:
-        dilation = [keras_layer.dilation_rate, keras_layer.dilation_rate]
-    dilated_kernel_h = (kernel_h - 1) * dilation[0] + 1
-    dilated_kernel_w = (kernel_w - 1) * dilation[1] + 1
-    stride_h, stride_w = keras_layer.strides
-    params = {'weight': symtab.new_const(weight),
-              'kernel_size': [kernel_h, kernel_w],
-              'strides': [stride_h, stride_w],
-              'dilation': dilation,
-              'padding': [0, 0],
-              'use_bias': False}
-    if is_depthconv:
-        params['channels'] = in_channels * depth_mult
-        params['groups'] = in_channels
-    else:
-        params['channels'] = n_filters
-    if keras_layer.use_bias:
-        params['use_bias'] = True
-        params['bias'] = symtab.new_const(weightList[1])
-    if keras_layer.padding == 'valid':
-        pass
-    # we insert a separate pad operator
-    elif keras_layer.padding == 'same':
-        in_h = keras_layer.input_shape[1]
-        in_w = keras_layer.input_shape[2]
-        pad_t, pad_b = _get_pad_pair(in_h, dilated_kernel_h, stride_h)
-        pad_l, pad_r = _get_pad_pair(in_w, dilated_kernel_w, stride_w)
-        if pad_t == pad_b and pad_l == pad_r:
-            params['padding'] = (pad_t, pad_l)
-        else:
-            insym = _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r)))
-    else:
-        msg = 'Value {} in attribute "padding" of operator Convolution is not valid.'
-        raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
-    if is_deconv:
-        out = _sym.conv2d_transpose(data=insym, **params)
-    else:
-        out = _sym.conv2d(data=insym, **params)
-    # defuse activation
-    if sys.version_info.major < 3:
-        act_type = keras_layer.activation.func_name
-    else:
-        act_type = keras_layer.activation.__name__
-    if act_type != 'linear':
-        out = _convert_activation(out, act_type, symtab)
-    return out
-
-
-def _convert_separable_convolution(insym, keras_layer, symtab):
-    _check_data_format(keras_layer)
-    weightList = keras_layer.get_weights()
-    # depthwise conv
-    kernel_h, kernel_w, in_channels, depth_mult = weightList[0].shape
-    stride_h, stride_w = keras_layer.strides
-    weight0 = weightList[0].transpose([2, 3, 0, 1])
-    params0 = {'weight': symtab.new_const(weight0),
-               'channels': in_channels * depth_mult,
-               'groups': in_channels,
-               'kernel_size': [kernel_h, kernel_w],
-               'strides': [stride_h, stride_w],
-               'dilation': [1, 1],
-               'padding': [0, 0],
-               'use_bias': False}
-    if keras_layer.padding == 'valid':
-        pass
-    # we insert a separate pad operator
-    elif keras_layer.padding == 'same':
-        in_h = keras_layer.input_shape[1]
-        in_w = keras_layer.input_shape[2]
-        pad_t, pad_b = _get_pad_pair(in_h, kernel_h, stride_h)
-        pad_l, pad_r = _get_pad_pair(in_w, kernel_w, stride_w)
-        insym = _sym.pad(data=insym, pad_width=(
-            (0, 0), (0, 0), (pad_t, pad_b), (pad_l, pad_r)))
-    else:
-        msg = 'Value {} in attribute "padding" of operator Separable Convolution is not valid.'
-        raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
-    depthconv = _sym.conv2d(data=insym, **params0)
-    # pointwise conv
-    weight1 = weightList[1].transpose([3, 2, 0, 1])
-    params1 = {'weight': symtab.new_const(weight1),
-               'channels': weight1.shape[0],
-               'groups': 1,
-               'kernel_size': [1, 1],
-               'strides': [1, 1],
-               'dilation': [1, 1],
-               'use_bias': False}
-    if keras_layer.use_bias:
-        params1['use_bias'] = True
-        params1['bias'] = symtab.new_const(weightList[2])
-    out = _sym.conv2d(data=depthconv, **params1)
-    # defuse activation
-    if sys.version_info.major < 3:
-        act_type = keras_layer.activation.func_name
-    else:
-        act_type = keras_layer.activation.__name__
-    if act_type != 'linear':
-        out = _convert_activation(out, act_type, symtab)
-    return out
-
-
-def _convert_flatten(insym, keras_layer, _):
-    _check_data_format(keras_layer)
-    # NCHW -> NHWC so that dense can be correctly converted
-    insym = _sym.transpose(insym, axes=[0, 2, 3, 1])
-    return _sym.flatten(insym)
-
-
-def _convert_pooling(insym, keras_layer, symtab):
-    _check_data_format(keras_layer)
-    pool_type = type(keras_layer).__name__
-    # global pool in keras = global pool + flatten in nnvm
-    if pool_type == 'GlobalMaxPooling2D':
-        return _convert_flatten(_sym.global_max_pool2d(insym), keras_layer, symtab)
-    if pool_type == 'GlobalAveragePooling2D':
-        return _convert_flatten(_sym.global_avg_pool2d(insym), keras_layer, symtab)
-    pool_h, pool_w = keras_layer.pool_size
-    stride_h, stride_w = keras_layer.strides
-    params = {'pool_size': [pool_h, pool_w],
-              'strides': [stride_h, stride_w],
-              'padding': [0, 0]}
-    if keras_layer.padding == 'valid':
-        pass
-    elif keras_layer.padding == 'same':
-        in_h = keras_layer.input_shape[1]
-        in_w = keras_layer.input_shape[2]
-        pad_t, pad_b = _get_pad_pair(in_h, pool_h, stride_h)
-        pad_l, pad_r = _get_pad_pair(in_w, pool_w, stride_w)
-        params['padding'] = [pad_t, pad_l, pad_b, pad_r]
-    else:
-        msg = 'Value {} in attribute "padding" of operator Pooling is not valid.'
-        raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
-    if pool_type == 'MaxPooling2D':
-        return _sym.max_pool2d(insym, **params)
-    if pool_type == 'AveragePooling2D':
-        # TODO: in keras, padded zeros are not calculated
-        return _sym.avg_pool2d(insym, **params)
-    msg = 'Value {} in attribute "padding" of operator Pooling is not valid.'
-    raise tvm.error.OpAttributeInvalid(msg.format(keras_layer.padding))
-
-
-def _convert_upsample(insym, keras_layer, _):
-    _check_data_format(keras_layer)
-    upsample_type = type(keras_layer).__name__
-    if upsample_type == "UpSampling1D":
-        h = keras_layer.size
-        params = {'scale': h}
-    elif upsample_type == "UpSampling2D":
-        h, w = keras_layer.size
-        if h != w:
-            raise tvm.error.OpAttributeInvalid(
-                'Upsample height ({}) must equal width ({})'.format(h, w))
-        params = {'scale': h}
-    elif upsample_type == "UpSampling3D":
-        h, w, d = keras_layer.size
-        if h != w or w != d:
-            raise tvm.error.OpAttributeInvalid(
-                'Upsample height ({}), width ({}), and depth ({}) must be equal.'.format(h, w, d))
-        params = {'scale': h}
-    else:
-        msg = 'Operator {} is not supported in frontend Keras.'
-        raise tvm.error.OpNotImplemented(msg.format(upsample_type))
-    return _sym.upsampling(insym, **params)
-
-
-def _convert_cropping(insym, keras_layer, _):
-    _check_data_format(keras_layer)
-    crop_type = type(keras_layer).__name__
-    if crop_type == "Cropping2D":
-        (_, in_h, in_w, _) = keras_layer.input_shape
-        ((crop_t, crop_b), (crop_l, crop_r)) = keras_layer.cropping
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend Keras.'.format(crop_type))
-    int32_max = np.iinfo(np.int32).max
-    return _sym.strided_slice(insym, begin=[0, 0, crop_t, crop_l],
-                              end=[int32_max, int32_max, in_h-crop_b, in_w-crop_r])
-
-
-def _convert_batchnorm(insym, keras_layer, symtab):
-    params = {'scale': False,
-              'center': False,
-              'epsilon': keras_layer.epsilon}
-    idx = 0
-    if keras_layer.scale:
-        params['scale'] = True
-        gamma = keras_layer.get_weights()[idx]
-        params['gamma'] = symtab.new_const(gamma)
-        idx += 1
-    if keras_layer.center:
-        params['center'] = True
-        beta = keras_layer.get_weights()[idx]
-        params['beta'] = symtab.new_const(beta)
-        idx += 1
-    moving_mean = keras_layer.get_weights()[idx]
-    moving_var = keras_layer.get_weights()[idx + 1]
-    params['moving_mean'] = symtab.new_const(moving_mean)
-    params['moving_var'] = symtab.new_const(moving_var)
-    return _sym.batch_norm(data=insym, **params)
-
-
-def _convert_padding(insym, keras_layer, _):
-    _check_data_format(keras_layer)
-    padding_type = type(keras_layer).__name__
-    padding = keras_layer.padding
-    top = left = bottom = right = 0
-    if padding_type == 'ZeroPadding2D':
-        if isinstance(padding, int):
-            top = left = bottom = right = padding
-        elif isinstance(padding, tuple):
-            if isinstance(padding[0], int):
-                top, left = padding
-                bottom, right = padding
-            elif isinstance(padding[0], tuple):
-                top, bottom = padding[0]
-                left, right = padding[1]
-            else:
-                msg = 'Value {} in attribute "padding" of operator {} is not valid.'
-                raise tvm.error.OpAttributeInvalid(msg.format(str(padding), padding_type))
-        else:
-            msg = 'Value {} in attribute "padding" of operator {} is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(str(padding), padding_type))
-    else:
-        raise tvm.error.OpNotImplemented('Operator {} is not supported in frontend Keras.')
-    return _sym.pad(data=insym, pad_width=((0, 0), (0, 0), (top, bottom), (left, right)))
-
-
-def _convert_concat(insym, keras_layer, _):
-    _check_data_format(keras_layer)
-    if not isinstance(insym, list):
-        insym = [insym]
-    return _sym.concatenate(*insym, axis=1)
-
-
-def _convert_reshape(insym, keras_layer, _):
-    _check_data_format(keras_layer)
-    ch = keras_layer.input_shape[-1]
-    assert ch == keras_layer.target_shape[-1], \
-        "Only supports last dimension in target shape being equal to " \
-        "the channel number of input tensor."
-    shape = (-1, ch) + keras_layer.target_shape[:-1]
-    return _sym.reshape(insym, shape=shape)
-
-def _convert_lstm(insym, keras_layer, symtab):
-    _check_data_format(keras_layer)
-    if not isinstance(insym, list):
-        buffer = np.zeros((1, keras_layer.units), 'float32')
-        c_sym = symtab.new_const(buffer)
-        h_sym = symtab.new_const(buffer)
-        insym = [insym, h_sym, c_sym]
-
-    in_data = insym[0]
-    next_h = insym[1]
-    next_c = insym[2]
-
-    weightList = keras_layer.get_weights()
-    inp_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.input_shape)[0])
-
-    kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
-    recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
-    in_bias = symtab.new_const(weightList[2])
-
-    units = list(weightList[0].shape)[1]
-
-    time_steps = inp_shape[1]
-    in_data = _sym.squeeze(in_data, axis=0)
-    in_data = _sym.split(in_data, indices_or_sections=time_steps, axis=0)
-    #loop for the number of time_steps
-    for data in in_data:
-        ixh1 = _sym.dense(data, kernel_wt, use_bias=False, units=units)
-        ixh2 = _sym.dense(next_h, recurrent_wt, in_bias, use_bias=True, units=units)
-        gate = ixh1 + ixh2
-        gates = _sym.split(gate, indices_or_sections=4, axis=1)
-        in_gate = _convert_recurrent_activation(gates[0], keras_layer)
-        in_transform = _convert_recurrent_activation(gates[1], keras_layer)
-        next_c = in_transform * next_c + in_gate * _convert_activation(gates[2], keras_layer, None)
-        out_gate = _convert_recurrent_activation(gates[3], keras_layer)
-        next_h = out_gate * _convert_activation(next_c, keras_layer, None)
-
-    out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
-    out = _sym.reshape(next_h, shape=out_shape)
-    return [out, next_h, next_c]
-
-def _convert_simple_rnn(insym, keras_layer, symtab):
-    _check_data_format(keras_layer)
-    if not isinstance(insym, list):
-        buffer = np.zeros((1, keras_layer.units), 'float32')
-        prev_sym = symtab.new_const(buffer)
-        insym = [insym, prev_sym]
-    in_data = insym[0]
-    prev_sym = insym[1]
-
-    weightList = keras_layer.get_weights()
-    kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
-    recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
-    in_bias = symtab.new_const(weightList[2])
-    units = list(weightList[0].shape)[1]
-
-    in_data = _sym.flatten(in_data)
-    ixh = _sym.dense(in_data, kernel_wt, in_bias, use_bias=True, units=units)
-    prev_sym = _sym.flatten(prev_sym)
-    ixh2 = _sym.dense(prev_sym, recurrent_wt, use_bias=False, units=units)
-    output = ixh + ixh2
-    output = _convert_activation(output, keras_layer, None)
-
-    out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
-    output = _sym.reshape(output, shape=out_shape)
-
-    return [output, output]
-
-def _convert_gru(insym, keras_layer, symtab):
-    _check_data_format(keras_layer)
-    if not isinstance(insym, list):
-        buffer = np.zeros((1, keras_layer.units), 'float32')
-        h_tm1 = symtab.new_const(buffer)
-        insym = [insym, h_tm1]
-    in_data = insym[0]
-    h_tm1_sym = insym[1]
-
-    weightList = keras_layer.get_weights()
-    kernel_wt = symtab.new_const(weightList[0].transpose([1, 0]))
-    recurrent_wt = symtab.new_const(weightList[1].transpose([1, 0]))
-    in_bias = symtab.new_const(weightList[2])
-
-    units = list(weightList[0].shape)[1]
-
-    in_data = _sym.flatten(in_data)
-    matrix_x = _sym.dense(in_data, kernel_wt, in_bias, use_bias=True, units=units)
-
-    # inputs projected by all gate matrices at once
-    split_indices = [keras_layer.units, 2 * keras_layer.units]
-    gates = _sym.split(matrix_x, indices_or_sections=split_indices, axis=1)
-    x_z = gates[0]
-    x_r = gates[1]
-    x_h = gates[2]
-
-    # hidden state projected separately for update/reset and new
-    units = 2 * keras_layer.units
-    split_indices = [units]
-    rec_wts = _sym.split(recurrent_wt, indices_or_sections=split_indices, axis=0)
-
-    h_tm1_sym = _sym.flatten(h_tm1_sym)
-    matrix_inner = _sym.dense(h_tm1_sym, rec_wts[0], use_bias=False, units=units)
-
-    split_indices = [keras_layer.units]
-    recurrent = _sym.split(matrix_inner, indices_or_sections=split_indices, axis=1)
-    recurrent_z = recurrent[0]
-    recurrent_r = recurrent[1]
-
-    rec_act_z = _convert_recurrent_activation(x_z + recurrent_z, keras_layer)
-    rec_act_r = _convert_recurrent_activation(x_r + recurrent_r, keras_layer)
-
-    units = keras_layer.units
-    recurrent_h = _sym.dense(rec_act_r * h_tm1_sym, rec_wts[1], use_bias=False, units=units)
-    act_hh = _convert_activation(x_h + recurrent_h, keras_layer, None)
-
-    # previous and candidate state mixed by update gate
-    output = rec_act_z * h_tm1_sym + (1 - rec_act_z) * act_hh
-
-    out_shape = tuple(dim if dim else 1 for dim in _as_list(keras_layer.output_shape)[0])
-    output = _sym.reshape(output, shape=out_shape)
-    return [output, output]
-
-def _default_skip(insym, keras_layer, _): # pylint: disable=unused-argument
-    """Layers that can be skipped because they are train time only."""
-    return insym
-
-
-_convert_map = {
-    'Dense'                    : _convert_dense,
-    'Activation'               : _convert_activation,
-    'ReLU'                     : _convert_advanced_activation,
-    'LeakyReLU'                : _convert_advanced_activation,
-    'PReLU'                    : _convert_advanced_activation,
-    'ELU'                      : _convert_advanced_activation,
-    'ThresholdedReLU'          : _convert_advanced_activation,
-
-    'AveragePooling2D'         : _convert_pooling,
-    'MaxPooling2D'             : _convert_pooling,
-    'GlobalAveragePooling2D'   : _convert_pooling,
-    'GlobalMaxPooling2D'       : _convert_pooling,
-    'Conv2D'                   : _convert_convolution,
-    'Conv2DTranspose'          : _convert_convolution,
-    'DepthwiseConv2D'          : _convert_convolution,
-    'SeparableConv2D'          : _convert_separable_convolution,
-
-    'Flatten'                  : _convert_flatten,
-    'Reshape'                  : _convert_reshape,
-    'Concatenate'              : _convert_concat,
-    'BatchNormalization'       : _convert_batchnorm,
-
-    'Add'                      : _convert_merge,
-    'Subtract'                 : _convert_merge,
-    'Multiply'                 : _convert_merge,
-    'ZeroPadding2D'            : _convert_padding,
-    'UpSampling2D'             : _convert_upsample,
-    'Cropping2D'               : _convert_cropping,
-
-    # 'ZeroPadding1D'          : _convert_padding,
-    # 'AveragePooling1D'       : _convert_pooling,
-    # 'MaxPooling1D'           : _convert_pooling,
-    # 'GlobalAveragePooling1D' : _convert_pooling,
-    # 'GlobalMaxPooling1D'     : _convert_pooling,
-    # 'Cropping1D'             : _convert_cropping,
-    # 'UpSampling1D'           : _convert_upsample,
-    # 'UpSampling3D'           : _convert_upsample,
-    # 'Conv1D'                 : _convert_convolution1d,
-
-    'SimpleRNN'                : _convert_simple_rnn,
-    'LSTM'                     : _convert_lstm,
-    'GRU'                      : _convert_gru,
-    # 'Bidirectional'          : _convert_bidirectional,
-    # 'TimeDistributed'        : _default_skip,
-
-    # 'Average'                : _convert_merge,
-    # 'Maximum'                : _convert_merge,
-    # 'Dot'                    : _convert_merge,
-    # 'Permute'                : _convert_permute,
-    # 'Embedding'              : _convert_embedding,
-    # 'RepeatVector'           : _convert_repeat_vector,
-
-    'InputLayer'               : _default_skip,
-    'Dropout'                  : _default_skip,
-    'SpatialDropout2D'         : _default_skip,
-    'SpatialDropout1D'         : _default_skip,
-}
-
-
-def _check_unsupported_layers(model):
-    for layer in model.layers:
-        op_name = type(layer).__name__
-        if op_name not in _convert_map:
-            raise tvm.error.OpNotImplemented(
-                'Operator {} is not supported in frontend Keras.'.format(op_name))
-
-def _as_list(arr):
-    """Force being a list, ignore if already is."""
-    if isinstance(arr, list):
-        return arr
-    return [arr]
-
-def keras_op_to_nnvm(insym, keras_layer, outname, symtab):
-    """Convert keras layer to nnvm symbol, and update symtab.
-
-    Parameters
-    ----------
-    insym : nnvm.symbol.Symbol or a list of it
-        The input nnvm symbol(s)
-
-    keras_layer : keras.layers
-        The keras layer to be converted
-
-    outname : str
-        Name of the output nnvm symbol
-
-    symtab : nnvm.frontend.common.SymbolTable
-        The global symbol table to be updated
-    """
-    op_name = type(keras_layer).__name__
-    if op_name not in _convert_map:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend Keras.'.format(op_name))
-    outs = _convert_map[op_name](insym, keras_layer, symtab)
-    outs = _as_list(outs)
-
-    for t_idx, out in enumerate(outs):
-        name = outname + ":" + str(t_idx)
-        symtab.set_var(name, out)
-
-def from_keras(model):
-    """Convert keras model to NNVM format.
-
-    Parameters
-    ----------
-    model : keras.engine.training.Model
-        The keras model to be converted
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.NDArray
-        The parameter dict to be used by nnvm
-    """
-    try:
-        import keras
-    except ImportError:
-        raise ImportError('Keras must be installed')
-
-    assert isinstance(model, keras.engine.training.Model)
-    if keras.backend.backend() != 'tensorflow':
-        raise ValueError("Keras frontend currently supports tensorflow backend only.")
-    if keras.backend.image_data_format() != 'channels_last':
-        raise ValueError("Keras frontend currently supports data_format = channels_last only.")
-    _check_unsupported_layers(model)
-
-    symtab = SymbolTable()
-    for keras_layer in model.layers:
-        if isinstance(keras_layer, keras.engine.InputLayer):
-            symtab.get_var(keras_layer.name, must_contain=False)
-        else:
-            inbound_nodes = keras_layer.inbound_nodes if hasattr(keras_layer, 'inbound_nodes') \
-                       else keras_layer._inbound_nodes if hasattr(keras_layer, '_inbound_nodes') \
-                       else None
-            if inbound_nodes is None:
-                raise TypeError("Unknown layer type or unsupported Keras version : {}"
-                                .format(keras_layer))
-            for node_idx, node in enumerate(inbound_nodes):
-                # If some nodes in imported model is not relevant to the current model,
-                # skip such layers. model._network_nodes contains keys of all nodes relevant
-                # to the current model.
-                if not model._node_key(keras_layer, node_idx) in model._network_nodes:
-                    continue
-
-                insym = []
-
-                # Since Keras allows creating multiple layers from the same name instance,
-                # we append node index to the symbol name to make it unique.
-                # The one exception is InputLayer.  Changing input variable names after conversion
-                # would confuse users, so we should keep them as far as possible.  Fortunately,
-                # they are named uniquely to input_1, input_2, input_3 ... by default.
-                zip_node = zip(node.node_indices, node.tensor_indices, node.inbound_layers)
-                for n_idx, t_idx, layer in zip_node:
-                    if isinstance(layer, keras.engine.InputLayer):
-                        sym = symtab.get_var(layer.name, must_contain=True)
-                    else:
-                        sym_name = layer.name + ':' + str(n_idx) + ':' + str(t_idx)
-                        sym = symtab.get_var(sym_name, must_contain=True)
-                    insym.append(sym)
-
-                if len(insym) == 1:
-                    insym = insym[0]
-                keras_op_to_nnvm(insym, keras_layer, keras_layer.name + ':' + str(node_idx), symtab)
-
-    #model._output_coordinates contains out_node(oc[0]), node_index(oc[1]) and tensor index(oc[2])
-    #Get all output nodes in symtab using the name made from above values. The out symbols
-    #were added to symtab in keras_op_to_nnvm using this name. For multiple outputs, make a list
-    #with these output symbols and Group them.
-    outsym = [symtab.get_var(oc[0].name + ":" + str(oc[1]) + ":" + str(oc[2]))
-              for oc in model._output_coordinates]
-
-    tvmparams = {k:tvm.nd.array(np.array(v, dtype=np.float32)) for k, v in symtab.params.items()}
-    return _sym.Group(outsym), tvmparams
diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py
deleted file mode 100644
index ff2b1456cbcd..000000000000
--- a/nnvm/python/nnvm/frontend/mxnet.py
+++ /dev/null
@@ -1,575 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, import-self
-"""MXNet symbol frontend."""
-from __future__ import absolute_import as _abs
-import json
-import tvm
-from .. import symbol as _sym
-from .common import get_nnvm_op, required_attr, parse_tshape, parse_bool_str
-
-__all__ = ['from_mxnet']
-
-def _rename(new_name):
-    def impl(inputs, attrs):
-        return get_nnvm_op(new_name)(*inputs, **attrs)
-    return impl
-
-def _pooling(inputs, attrs):
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'pooling'))
-    if len(kernel) != 2:
-        raise tvm.error.OpAttributeUnImplemented(
-            'Non-2D kernels are not supported for Pool2D.')
-    global_pool = 'global' if parse_bool_str(attrs, 'global_pool') else ''
-    pool_type = required_attr(attrs, 'pool_type', 'pooling')
-    if pool_type not in ['avg', 'max']:
-        raise tvm.error.OpNotImplemented(
-            'Only max and average pooling are supported in frontend MXNet.')
-    op_name, new_attrs = '_'.join([global_pool, pool_type, 'pool2d']).strip('_'), {}
-    # new_attrs['layout'] = 'NCHW'
-    if not global_pool:
-        new_attrs['pool_size'] = kernel
-        new_attrs['strides'] = attrs.get('stride', (1, 1))
-        new_attrs['padding'] = attrs.get('pad', (0, 0))
-        new_attrs['ceil_mode'] = (attrs.get('pooling_convention', 'valid') == 'full')
-        if pool_type == 'avg':
-            new_attrs['count_include_pad'] = attrs.get('count_include_pad', True)
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _batch_norm(inputs, attrs):
-    if parse_bool_str(attrs, 'output_mean_var'):
-        raise tvm.error.OpAttributeUnImplemented(
-            'Attribute "output_mean_var" is not supported in operator batch_norm.')
-    # if parse_bool_str(attrs, 'fix_gamma'):
-    #     _warn_not_used('fix_gamma', 'batch_norm')
-    if parse_bool_str(attrs, 'use_global_stats'):
-        from warnings import warn
-        warn(
-            'Attribute "use_global_stats" is ignored in operator batch_norm.')
-    # if parse_bool_str(attrs, 'momentum'):
-    #     _warn_not_used('momentum', 'batch_norm')
-    op_name, new_attrs = 'batch_norm', {}
-    new_attrs['axis'] = attrs.get('axis', 1)
-    new_attrs['epsilon'] = attrs.get('eps', 0.001)
-    new_attrs['center'] = True
-    new_attrs['scale'] = not parse_bool_str(attrs, 'fix_gamma', default="False")
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _concat(inputs, attrs):
-    op_name = 'concatenate'
-    new_attrs = {'axis': attrs.get('dim', 1)}
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _conv2d(inputs, attrs):
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d'))
-    if len(kernel) != 2:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Non-2D kernels are not supported for operator Conv2D.')
-    layout = attrs.get('layout', 'NCHW')
-    if layout not in ['NCHW', 'NHWC']:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Layout {} is not supported in operator Conv2D.'.format(layout))
-    if 'kernel_layout' in attrs:
-        kernel_layout = attrs['kernel_layout']
-    else:
-        kernel_layout = 'HWIO' if layout == 'NHWC' else 'OIHW'
-    op_name, new_attrs = 'conv2d', {}
-    new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d')
-    new_attrs['kernel_size'] = kernel
-    new_attrs['strides'] = attrs.get('stride', (1, 1))
-    new_attrs['padding'] = attrs.get('pad', (0, 0))
-    new_attrs['dilation'] = attrs.get('dilate', (1, 1))
-    new_attrs['groups'] = attrs.get('num_group', 1)
-    new_attrs['layout'] = layout
-    new_attrs['kernel_layout'] = kernel_layout
-    new_attrs['use_bias'] = attrs.get('no_bias', 'False').strip() == 'False'
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _conv2d_transpose(inputs, attrs):
-    if 'target_shape' in attrs:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Attribute "target_shape" is not supported in operator Conv2D-transpose.')
-    kernel = parse_tshape(required_attr(attrs, 'kernel', 'conv2d_transpose'))
-    if len(kernel) != 2:
-        raise tvm.error.OpAttributeInvalid(
-            'Non-2D kernels are not supported in Conv2D-transpose.')
-    layout = attrs.get('layout', 'NCHW')
-    if layout not in ['NCHW', 'NHWC']:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Layout {} is not supported in operator Conv2D-transpose.')
-    if 'kernel_layout' in attrs:
-        kernel_layout = attrs['kernel_layout']
-    else:
-        kernel_layout = 'HWIO' if layout == 'NHWC' else 'OIHW'
-    op_name, new_attrs = 'conv2d_transpose', {}
-    new_attrs['channels'] = required_attr(attrs, 'num_filter', 'conv2d_transpose')
-    new_attrs['kernel_size'] = kernel
-    new_attrs['strides'] = attrs.get('stride', (1, 1))
-    new_attrs['output_padding'] = attrs.get('adj', (0, 0))
-    new_attrs['padding'] = attrs.get('pad', (0, 0))
-    new_attrs['dilation'] = attrs.get('dilate', (1, 1))
-    new_attrs['groups'] = attrs.get('num_group', 1)
-    new_attrs['layout'] = layout
-    new_attrs['kernel_layout'] = kernel_layout
-    new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _dense(inputs, attrs):
-    import mxnet as mx
-    op_name, new_attrs = 'dense', {}
-    new_attrs['units'] = required_attr(attrs, 'num_hidden', 'dense')
-    new_attrs['use_bias'] = not parse_bool_str(attrs, 'no_bias')
-    try:
-        _ = mx.sym.FullyConnected(mx.sym.var('x'), num_hidden=1, flatten=True)
-        has_flatten = True
-    except mx.base.MXNetError:
-        # no flatten attribute in old mxnet
-        has_flatten = False
-    use_flatten = parse_bool_str(attrs, 'flatten', 'True')
-    if has_flatten and use_flatten:
-        inputs[0] = _sym.flatten(inputs[0])
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _dropout(inputs, attrs):
-    op_name, new_attrs = 'dropout', {}
-    new_attrs['rate'] = attrs.get('p', 0.5)
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _leaky_relu(inputs, attrs):
-    act_type = required_attr(attrs, 'act_type', 'leaky_relu')
-    if act_type in ['leaky', 'prelu']:
-        op_name, new_attrs = act_type, {}
-        if act_type == 'leaky':
-            new_attrs['alpha'] = attrs.get('slope', 0.25)
-        sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    elif act_type == 'elu':
-        slope = attrs.get('slope', 0.25)
-        sym = -slope * _sym.relu(1 - _sym.exp(*inputs)) + _sym.relu(*inputs)
-    elif act_type == 'rrelu':
-        lower_bound = float(required_attr(attrs, 'lower_bound', 'leaky_relu'))
-        upper_bound = float(required_attr(attrs, 'upper_bound', 'leaky_relu'))
-        slope = (lower_bound + upper_bound) / 2.0
-        op_name, new_attrs = 'leaky_relu', {'alpha': str(slope)}
-        sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend MXNet.'.format(act_type))
-    return sym
-
-def _activations(inputs, attrs):
-    act_type = required_attr(attrs, 'act_type', 'activations')
-    if act_type in ['relu', 'sigmoid', 'tanh']:
-        op_name, new_attrs = act_type, {}
-        sym = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    elif act_type == 'softrelu':
-        sym = _sym.log((1 + _sym.exp(*inputs)))
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend MXNet.'.format(act_type))
-    return sym
-
-def _reshape(inputs, attrs):
-    if parse_bool_str(attrs, 'reverse'):
-        raise tvm.error.OpAttributeUnimplemented(
-            'Attribute "reverse" is not supported in operator Reshape.')
-    op_name, new_attrs = 'reshape', {}
-    new_attrs['shape'] = required_attr(attrs, 'shape', 'reshape')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _slice(inputs, attrs):
-    begin = attrs.get('begin', None)
-    end = attrs.get('end', None)
-    stride = attrs.get('step', None)
-    if begin is None or end is None:
-        raise RuntimeError('begin and end are required params')
-    if 'None' in begin or 'None' in end:
-        raise RuntimeError('None in begin or end not supported yet...')
-    new_attrs = {'begin': begin, 'end': end}
-    if stride is not None:
-        new_attrs['stride'] = stride
-    return get_nnvm_op('strided_slice')(inputs[0], **new_attrs)
-
-def _split(inputs, attrs):
-    op_name, new_attrs = 'split', {}
-    axis = attrs.get('axis', 1)
-    new_attrs['indices_or_sections'] = required_attr(attrs, 'num_outputs', 'split')
-    new_attrs['axis'] = axis
-    outputs = get_nnvm_op(op_name)(*inputs, **new_attrs)
-    if parse_bool_str(attrs, 'squeeze_axis'):
-        squeeze_attrs = {'axis': axis}
-        outputs = _sym.Group([get_nnvm_op('squeeze')(o, **squeeze_attrs) for o in outputs])
-    return outputs
-
-def _softmax_activation(inputs, attrs):
-    op_name, new_attrs = 'softmax', {}
-    mode = attrs.get('mode', 'instance')
-    new_attrs['axis'] = 0 if mode == 'instance' else 1
-    return get_nnvm_op(op_name)(inputs[0], **new_attrs)
-
-def _softmax_output(inputs, attrs):
-    op_name, new_attrs = 'softmax', {}
-    if parse_bool_str(attrs, 'multi_output'):
-        new_attrs['axis'] = 1
-    return get_nnvm_op(op_name)(inputs[0], **new_attrs)
-
-def _upsampling(inputs, attrs):
-    scale = attrs.get('scale')
-    new_attrs = {'scale':int(scale)}
-    return get_nnvm_op('upsampling')(inputs[0], **new_attrs)
-
-def _clip(inputs, attrs):
-    op_name, new_attrs = "clip", {}
-    new_attrs['a_min'] = required_attr(attrs, 'a_min', 'clip')
-    new_attrs['a_max'] = required_attr(attrs, 'a_max', 'clip')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _contrib_multibox_detection(inputs, attrs):
-    clip = parse_bool_str(attrs, 'clip', default='True')
-    threshold = attrs.get('threshold') or 0.01
-    nms_threshold = attrs.get('nms_threshold') or 0.5
-    force_suppress = parse_bool_str(attrs, 'force_suppress', default='False')
-    variances = tuple([float(x.strip()) for x in attrs.get('variances').strip('()').split(',')]) \
-        if attrs.get('variances') is not None else (0.1, 0.1, 0.2, 0.2)
-    nms_topk = attrs.get('nms_topk') or -1
-    new_attrs0 = {'clip': clip, 'threshold': float(threshold), 'variances': variances}
-    new_attrs1 = {'return_indices': False, 'iou_threshold': float(nms_threshold),
-                  'force_suppress': force_suppress, 'top_k': int(nms_topk)}
-    data, valid_count = get_nnvm_op('multibox_transform_loc')(inputs[0], inputs[1],
-                                                              inputs[2], **new_attrs0)
-    return get_nnvm_op('non_max_suppression')(data, valid_count, **new_attrs1)
-
-def _elemwise_sum(inputs, _):
-    new_attrs = {'num_args':len(inputs)}
-    return get_nnvm_op('elemwise_sum')(*inputs, **new_attrs)
-
-def _crop_like(inputs, attrs):
-    new_attrs = {}
-    offsets = \
-        tuple([float(x.strip()) for x in attrs.get('offsets').strip('()').split(',')]) \
-            if attrs.get('offsets') is not None else (0, 0)
-    if offsets != (0, 0):
-        raise tvm.error.OpAttributeInvalid(
-            'crop_like offsets must equal (0,0).')
-    center_crop = parse_bool_str(attrs, 'center_crop', default="False")
-    if center_crop:
-        raise tvm.error.OpAttributeUnimplemented(
-            'Center crop is not supported in operator crop_like.')
-    if len(inputs) < 2:
-        raise tvm.error.OpAttributeUnimplemented("Only support crop_like pattern.")
-    new_attrs["axis"] = [2, 3]
-    return get_nnvm_op('slice_like')(inputs[0], inputs[1], **new_attrs)
-
-
-def _expand_dims(inputs, attrs):
-    op_name, new_attrs = 'expand_dims', {}
-    new_attrs['axis'] = required_attr(attrs, 'axis', 'expand_dims')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _lrn(inputs, attrs):
-    op_name, new_attrs = 'lrn', {}
-    new_attrs['alpha'] = attrs.get('alpha', 0.0001)
-    new_attrs['beta'] = attrs.get('beta', 0.75)
-    new_attrs['bias'] = attrs.get('knorm', 2)
-    # NCHW format and normalization along channel axis
-    new_attrs['axis'] = 1
-    new_attrs['size'] = required_attr(attrs, 'nsize', 'lrn')
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _minimum(inputs, attrs):
-    return get_nnvm_op('broadcast_min')(*inputs, **attrs)
-
-def _maximum(inputs, attrs):
-    return get_nnvm_op('broadcast_max')(*inputs, **attrs)
-
-def _ones(_, attrs):
-    op_name = 'ones'
-    return get_nnvm_op(op_name)(**attrs)
-
-def _zeros(_, attrs):
-    op_name = 'zeros'
-    return get_nnvm_op(op_name)(**attrs)
-
-def _argmax(inputs, attrs):
-    op_name, new_attrs = 'argmax', {}
-    new_attrs['dtype'] = 'float32'
-    new_attrs['axis'] = attrs.get('axis', 0)
-    new_attrs['keepdims'] = parse_bool_str(attrs, 'keepdims', default="False")
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-def _argmin(inputs, attrs):
-    op_name, new_attrs = 'argmin', {}
-    new_attrs['dtype'] = 'float32'
-    new_attrs['axis'] = attrs.get('axis', 0)
-    new_attrs['keepdims'] = parse_bool_str(attrs, 'keepdims', default="False")
-    return get_nnvm_op(op_name)(*inputs, **new_attrs)
-
-_identity_list = ['__add_scalar__', '__add_symbol__', '__div_scalar__',
-                  '__div_symbol__', '__mul_scalar__', '__mul_symbol__',
-                  '__pow_scalar__', '__rdiv_scalar__', '__rpow_scalar__',
-                  '__rsub_scalar__', '__sub_scalar__', '__sub_symbol__',
-                  'broadcast_add', 'broadcast_div', 'broadcast_mul',
-                  'broadcast_sub', 'broadcast_to', 'cast', 'elemwise_add',
-                  'elemwise_div', 'elemwise_mul', 'elemwise_sub', 'exp',
-                  'flatten', 'log', 'log_softmax', 'max', 'min', 'negative',
-                  'ones_like', 'relu', 'sigmoid', 'slice_like', 'softmax',
-                  'sum', 'tanh', 'transpose', 'zeros_like', 'gather_nd',
-                  'reshape_like', 'where']
-
-_convert_map = {
-    '_copy'         : _rename('copy'),
-    '_div_scalar'   : _rename('__div_scalar__'),
-    '_minus_scalar' : _rename('__sub_scalar__'),
-    '_mul_scalar'   : _rename('__mul_scalar__'),
-    '_plus_scalar'  : _rename('__add_scalar__'),
-    '_rdiv_scalar'  : _rename('__rdiv_scalar__'),
-    '_rminus_scalar': _rename('__rsub_scalar__'),
-    '_contrib_MultiBoxPrior' : _rename('multibox_prior'),
-    '_contrib_MultiBoxDetection' : _contrib_multibox_detection,
-    '_minimum'      : _minimum,
-    '_maximum'      : _maximum,
-    '_ones'         : _ones,
-    '_zeros'        : _zeros,
-    'argmax'        : _argmax,
-    'argmin'        : _argmin,
-    'Activation'    : _activations,
-    'BatchNorm'     : _batch_norm,
-    'BatchNorm_v1'  : _batch_norm,
-    'Cast'          : _rename('cast'),
-    'Concat'        : _concat,
-    'Convolution'   : _conv2d,
-    'Convolution_v1': _conv2d,
-    'Crop'          : _crop_like,
-    'Deconvolution' : _conv2d_transpose,
-    'Dropout'       : _dropout,
-    'Flatten'       : _rename('flatten'),
-    'FullyConnected': _dense,
-    'LeakyReLU'     : _leaky_relu,
-    'Pooling'       : _pooling,
-    'Pooling_v1'    : _pooling,
-    'Reshape'       : _reshape,
-    'slice'         : _slice,
-    'SliceChannel'  : _split,
-    'split'         : _split,
-    'Softmax'       : _rename('softmax'),
-    'SoftmaxActivation' : _softmax_activation,
-    'SoftmaxOutput' : _softmax_output,
-    'add_n'         : _elemwise_sum,
-    'concat'        : _concat,
-    'max_axis'      : _rename('max'),
-    'min_axis'      : _rename('min'),
-    'reshape'       : _reshape,
-    'sum_axis'      : _rename('sum'),
-    'UpSampling'    : _upsampling,
-    'clip'          : _clip,
-    'expand_dims'   : _expand_dims,
-    'LRN'           : _lrn
-}
-
-def _convert_symbol(op_name, inputs, attrs,
-                    identity_list=None,
-                    convert_map=None):
-    """Convert from mxnet op to nnvm op.
-    The converter must specify some conversions explicitly to
-    support gluon format ops such as conv2d...
-
-    Parameters
-    ----------
-    op_name : str
-        Operator name, such as Convolution, FullyConnected
-    inputs : list of nnvm.Symbol
-        List of input symbols.
-    attrs : dict
-        Dict of operator attributes
-    identity_list : list
-        List of operators that don't require conversion
-    convert_map : dict
-        Dict of name : callable, where name is the op's name that
-        require conversion to nnvm, callable are functions which
-        take attrs and return (new_op_name, new_attrs)
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Converted nnvm Symbol
-    """
-    identity_list = identity_list if identity_list else _identity_list
-    convert_map = convert_map if convert_map else _convert_map
-    if op_name in identity_list:
-        op = get_nnvm_op(op_name)
-        sym = op(*inputs, **attrs)
-    elif op_name in convert_map:
-        sym = convert_map[op_name](inputs, attrs)
-    else:
-        raise tvm.error.OpNotImplemented(
-            'Operator {} is not supported in frontend MXNet.'.format(op_name))
-    return sym
-
-def _as_list(arr):
-    """Force being a list, ignore if already is."""
-    if isinstance(arr, list):
-        return arr
-    return [arr]
-
-def _topo_sort(symbol):
-    """Sort all symbols in the mxnet graph in topological order.
-
-    Parameters
-    ----------
-    symbol : mxnet.sym.Symbol
-
-    Returns:
-    -------
-    list
-        List of mxnet symbol
-    """
-    queue = []
-    symbol_map = {}
-    deps = {}
-    dep_cnts = {}
-    for s in symbol:
-        symbol_map[s.attr('name')] = s
-        queue.append(s)
-    while queue:
-        sym = queue.pop(0)
-        name = sym.attr('name')
-        childs = sym.get_children()
-        if childs is None:
-            dep_cnts[name] = 0
-        else:
-            dep_cnts[name] = len({c.attr('name') for c in childs})
-            for child in childs:
-                child_name = child.attr('name')
-                if child_name not in deps:
-                    deps[child_name] = set()
-                deps[child_name].add(name)
-                if child_name not in symbol_map:
-                    symbol_map[child_name] = child
-                    queue.append(child)
-    order = []
-    while dep_cnts:
-        remove = []
-        for name in dep_cnts:
-            if dep_cnts[name] == 0:
-                order.append(symbol_map[name])
-                remove.append(name)
-                if name in deps:
-                    for other in deps[name]:
-                        dep_cnts[other] -= 1
-        for name in remove:
-            del dep_cnts[name]
-    return order
-
-def _from_mxnet_impl(symbol, graph):
-    """Convert mxnet symbol to nnvm implementation.
-    Reconstruct a nnvm symbol by traversing the mxnet symbol.
-
-    Parameters
-    ----------
-    symbol : mxnet.sym.Symbol
-        Incompatible symbol from mxnet, sharing similar graph structure.
-        The op_name and attrs inside are not always compatible.
-    graph : dict
-        Reusable nodes are stored in graph.
-
-    Returns:
-    -------
-    nnvm.sym.Symbol
-        Converted symbol
-    """
-    def get_node(sym):
-        name = sym.attr('name')
-        if name not in graph:
-            return None
-        output_index = json.loads(sym.tojson())['heads'][0][1]
-        return graph[name][output_index]
-
-    assert symbol is not None
-    # Traverse all symbols in topological order
-    for sym in _topo_sort(symbol):
-        name = sym.attr('name')
-        attr = sym.list_attr()
-        op_name = sym.attr('op_name')
-        childs = sym.get_children()
-        if childs is not None:
-            childs = [get_node(child) for child in childs]
-            childs = [x for y in childs for x in _as_list(y)]
-            node = _convert_symbol(op_name, childs, attr)
-        elif op_name != 'null':
-            node = _convert_symbol(op_name, [], attr)
-        else:
-            node = _sym.Variable(name=name, **attr)
-        graph[name] = node
-    nodes = []
-    for sym in symbol:
-        node = get_node(sym)
-        assert node is not None
-        nodes.append(node)
-    if len(nodes) > 1:
-        return _sym.Group(nodes)
-    return nodes[0]
-
-def from_mxnet(symbol, arg_params=None, aux_params=None):
-    """Convert from MXNet's model into compatible NNVM format.
-
-    Parameters
-    ----------
-    symbol : mxnet.Symbol or mxnet.gluon.HybridBlock
-        MXNet symbol
-
-    arg_params : dict of str to mx.NDArray
-        The argument parameters in mxnet
-
-    aux_params : dict of str to mx.NDArray
-        The auxiliary parameters in mxnet
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.NDArray
-        The parameter dict to be used by nnvm
-    """
-    try:
-        import mxnet as mx
-    except ImportError as e:
-        raise ImportError('{}. MXNet is required to parse symbols.'.format(e))
-
-    if isinstance(symbol, mx.sym.Symbol):
-        sym = _from_mxnet_impl(symbol, {})
-        params = {}
-        arg_params = arg_params if arg_params else {}
-        aux_params = aux_params if aux_params else {}
-        for k, v in arg_params.items():
-            params[k] = tvm.nd.array(v.asnumpy())
-        for k, v in aux_params.items():
-            params[k] = tvm.nd.array(v.asnumpy())
-    elif isinstance(symbol, mx.gluon.HybridBlock):
-        data = mx.sym.Variable('data')
-        sym = symbol(data)
-        sym = _from_mxnet_impl(sym, {})
-        params = {}
-        for k, v in symbol.collect_params().items():
-            params[k] = tvm.nd.array(v.data().asnumpy())
-    elif isinstance(symbol, mx.gluon.Block):
-        raise NotImplementedError("Only Hybrid Blocks are supported now.")
-    else:
-        msg = "mxnet.Symbol or gluon.HybridBlock expected, got {}".format(type(symbol))
-        raise ValueError(msg)
-    if isinstance(sym, list):
-        sym = _sym.Group(sym)
-    return sym, params
diff --git a/nnvm/python/nnvm/frontend/onnx.py b/nnvm/python/nnvm/frontend/onnx.py
deleted file mode 100644
index 8a92821476a5..000000000000
--- a/nnvm/python/nnvm/frontend/onnx.py
+++ /dev/null
@@ -1,1038 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines
-"""ONNX: Open Neural Network Exchange frontend."""
-from __future__ import absolute_import as _abs
-import numpy as np
-import tvm
-from .. import symbol as _sym
-from .common import get_nnvm_op, Renamer, SymbolTable, AttrConverter as AttrCvt
-from .onnx_caffe2_utils import dimension_picker, dimension_constraint, \
-    infer_channels, revert_caffe2_pad
-
-__all__ = ['from_onnx']
-
-
-def onnx_storage_order2layout(storage_order):
-    if storage_order not in (0, 1):
-        raise tvm.error.OpAttributeInvalid('Mode of storage_order must be either 0 or 1')
-
-    return 'NCHW' if storage_order == 0 else 'NHWC'
-
-
-class OnnxOpConverter(object):
-    """ A helper class for holding onnx op converters.
-    """
-
-    @classmethod
-    def get_converter(cls, opset):
-        """ Get converter matches given opset.
-
-        :param opset: opset from model.
-        :return: converter, which should be `_impl_vx`. Number x is the biggest
-            number smaller than or equal to opset belongs to all support versions.
-        """
-        versions = [
-            int(d.replace('_impl_v', '')) for d in dir(cls) if '_impl_v' in d
-        ]
-        versions = sorted(versions + [opset])
-        version = versions[
-            max([i for i, v in enumerate(versions) if v == opset]) - 1]
-        if hasattr(cls, '_impl_v{}'.format(version)):
-            return getattr(cls, '_impl_v{}'.format(version))
-        raise NotImplementedError(
-            'opset version {} of {} not implemented'.format(
-                version, cls.__name__))
-
-
-class Elemwise(OnnxOpConverter):
-    """ A helper class for elemwise op converters.
-    """
-
-    name = ''
-
-    @classmethod
-    def _math_name_picker(cls, suffix):
-
-        def _impl(attr):
-            if attr.get('broadcast', 0):
-                return 'broadcast_' + suffix
-            return 'elemwise_' + suffix
-
-        return _impl
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        assert len(inputs) == 2, "Math op take 2 inputs, {} given".format(
-            len(inputs))
-        op_name = cls._math_name_picker(cls.name)(attr)
-        axis = int(attr.get('axis', 0))
-        conv_ops = ["conv2d", "conv2d_transpose"]
-        if op_name == 'broadcast_add' and inputs[0].attr('op_name') in conv_ops:
-            # TODO(zhreshold): remove hard coded infershape
-            inputs[1] = _sym.expand_dims(inputs[1], axis=axis, num_newaxis=2)
-        return get_nnvm_op(op_name)(*inputs)
-
-
-class Pool(OnnxOpConverter):
-    """ A helper class for pool op converters.
-    """
-
-    name = ''
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return AttrCvt(
-            op_name=dimension_picker(cls.name),
-            transforms={
-                'kernel_shape': 'pool_size',
-                'pads': ('padding', (0, 0), revert_caffe2_pad)
-            },
-            # very weird attributes here in onnx, force check
-            ignores=['dilations'],
-            # TODO(zhreshold): make sure ceil_mode in onnx, and layout?
-            extras={'ceil_mode': False},
-            custom_check=dimension_constraint())(inputs, attr, params)
-
-
-class Absolute(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return _sym.relu(inputs[0]) + _sym.relu(_sym.negative(inputs[0]))
-
-
-class Add(Elemwise):
-    name = 'add'
-
-
-class AveragePool(Pool):
-    name = 'avg_pool'
-
-
-class BatchNorm(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        # TODO(zhreshold): 'spatial' is not properly handled here.
-        return AttrCvt(
-            op_name='batch_norm',
-            disables=['momentum'],
-            ignores=['spatial', 'is_test', 'consumed_inputs'])(inputs, attr,
-                                                               params)
-
-
-class Conv(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        # get number of channels
-        channels = infer_channels(inputs[1], params)
-        attr['channels'] = channels
-        return AttrCvt(
-            op_name=dimension_picker('conv'),
-            transforms={
-                'kernel_shape': 'kernel_size',
-                'dilations': ('dilation', (0, 0)),
-                'pads': ('padding', (0, 0), revert_caffe2_pad),
-                'group': ('groups', 1)
-            },
-            extras={'use_bias': len(inputs) == 3},
-            custom_check=dimension_constraint())(inputs, attr, params)
-
-
-class ConvTranspose(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        # get number of channels
-        channels = infer_channels(inputs[1], params, True)
-        attr['channels'] = channels
-        groups = attr.pop('group')
-        attr['groups'] = groups
-        return AttrCvt(
-            op_name=dimension_picker('conv', '_transpose'),
-            transforms={
-                'kernel_shape': 'kernel_size',
-                'dilations': ('dilation', (0, 0)),
-                'pads': ('padding', (0, 0), revert_caffe2_pad)
-            },
-            disables=['output_shape'],
-            extras={'use_bias': len(inputs) == 3},
-            custom_check=dimension_constraint())(inputs, attr, params)
-
-
-class Div(Elemwise):
-    name = 'div'
-
-
-class Elu(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        alpha = float(attr.get('alpha', 1.0))
-        return -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(
-            inputs[0])
-
-
-class Gemm(OnnxOpConverter):
-    """ Operator converter for Gemm.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        assert len(inputs) == 3, "Gemm op take 3 inputs, {} given".format(
-            len(inputs))
-        # Y = alpha * A * B + beta * C
-        alpha = float(attr.get('alpha', 1.0))
-        beta = float(attr.get('beta', 1.0))
-        transA = int(attr.get('transA', 0))
-        transB = int(attr.get('transB', 0))
-        # get number of channels
-        channels = infer_channels(inputs[1], params, not transB)
-        if transA:
-            inputs[0] = _sym.transpose(inputs[0], axes=(1, 0))
-        if not transB:
-            inputs[1] = _sym.transpose(inputs[1], axes=(1, 0))
-        inputs[0] = _sym.flatten(inputs[0])
-        return _sym.dense(
-            alpha * inputs[0], inputs[1], beta * inputs[2], units=channels)
-
-
-class MaxPool(Pool):
-    """ Operator converter for MaxPool
-    """
-    name = 'max_pool'
-
-    @classmethod
-    def _impl_v8(cls, inputs, attr, params):
-        return AttrCvt(
-            op_name=dimension_picker(cls.name),
-            transforms={
-                'kernel_shape': 'pool_size',
-                'pads': ('padding', (0, 0), revert_caffe2_pad),
-                'storage_order': ('layout', 'NCHW', onnx_storage_order2layout),
-            },
-            # very weird attributes here in onnx, force check
-            ignores=['dilations', 'auto_pad'],
-            # TODO(higumachan): make sure ceil_mode in onnx, and layout?
-            extras={'ceil_mode': False},
-            custom_check=dimension_constraint())(inputs, attr, params)
-
-    @classmethod
-    def _impl_v10(cls, inputs, attr, params):
-        return AttrCvt(
-            op_name=dimension_picker(cls.name),
-            transforms={
-                'kernel_shape': 'pool_size',
-                'pads': ('padding', (0, 0), revert_caffe2_pad),
-                'storage_order': ('layout', 'NCHW', onnx_storage_order2layout),
-                'ceil_mode': 'ceil_mode'
-            },
-            # very weird attributes here in onnx, force check
-            ignores=['dilations', 'auto_pad'],
-            custom_check=dimension_constraint())(inputs, attr, params)
-
-class Mul(Elemwise):
-    name = 'mul'
-
-
-class Pad(OnnxOpConverter):
-    """ Operator converter for Pad.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        pad_width = []
-        pads = attr.pop('paddings')
-        dims = int(len(pads) / 2)
-        for i in range(dims):
-            pad_width.append((pads[i], pads[i+dims]))
-        attr['pad_width'] = pad_width
-
-        return AttrCvt(
-            op_name='pad',
-            transforms={
-                'value': 'pad_value',
-            },
-            ignores=['mode'],
-            custom_check=(lambda attrs: attrs.get('mode', 'constant').decode("utf-8") == 'constant',
-                          'split mode != constant'))(inputs, attr, params)
-
-    @classmethod
-    def _impl_v2(cls, inputs, attr, params):
-        pad_width = []
-        pads = attr.pop('pads')
-        dims = int(len(pads) / 2)
-        for i in range(dims):
-            pad_width.append((pads[i], pads[i+dims]))
-        attr['pad_width'] = pad_width
-
-        return AttrCvt(
-            op_name='pad',
-            transforms={
-                'value': 'pad_value',
-            },
-            ignores=['mode'],
-            custom_check=(lambda attrs: attrs.get('mode', 'constant').decode("utf-8") == 'constant',
-                          'split mode != constant'))(inputs, attr, params)
-
-
-class ParametricSoftPlus(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        alpha = float(attr.get('alpha', 1.0))
-        beta = float(attr.get('beta', 1.0))
-        return _sym.log(_sym.exp(beta * inputs[0]) + 1) * alpha
-
-
-class Prelu(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        assert len(inputs) == 2, "Prelu need 2 inputs, {} given".format(
-            len(inputs))
-        return _sym.prelu(inputs[0], inputs[1])
-
-
-class Reciprocal(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return 1.0 / inputs[0]
-
-
-class Reshape(OnnxOpConverter):
-    """ Operator converter for Reshape.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return _sym.reshape(inputs[0], shape=attr['shape'])
-
-    @classmethod
-    def _impl_v5(cls, inputs, attr, params):
-        if inputs[1].list_output_names()[0] in params:
-            shape = tuple(params[inputs[1].list_output_names()[0]].asnumpy())
-            out = _sym.reshape(inputs[0], shape=shape)
-        else:
-            out = _sym.reshape_like(inputs[0], inputs[1])
-
-        return out
-
-class Scale(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        scale = float(attr.get('scale', 1.0))
-        return inputs[0] * scale
-
-
-class Selu(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        alpha = float(attr.get('alpha', 1.6732))
-        gamma = float(attr.get('gamma', 1.0507))
-        return gamma * (
-            -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0]))
-
-
-class ScaledTanh(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        alpha = float(attr.get('alpha', 1.0))
-        beta = float(attr.get('beta', 1.0))
-        return _sym.tanh(beta * inputs[0]) * alpha
-
-
-class SoftPlus(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return _sym.log(_sym.exp(inputs[0]) + 1)
-
-
-class Softsign(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return inputs[0] / (1 + Absolute.get_converter(1)(inputs, attr, params))
-
-
-class Sub(Elemwise):
-    name = 'sub'
-
-
-class Sum(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        # Onnx Sum Operator
-        for in_index in range(len(inputs) - 1):
-            inputs[in_index + 1] = _sym.broadcast_add(inputs[in_index],
-                                                      inputs[in_index + 1])
-
-        return inputs[len(inputs) - 1]
-
-
-class ThresholdedRelu(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        alpha = float(attr.get('alpha', 1.0))
-        alpha_tensor = _sym.full_like(inputs[0], fill_value=float(alpha))
-        return _sym.elemwise_mul(inputs[0], _sym.greater(inputs[0], alpha_tensor))
-
-class ImageScaler(OnnxOpConverter):
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        channelScale = attr['scale']
-        bias_attr = attr['bias']
-        bias = SymbolTable().new_const(np.array(bias_attr).reshape([3, 1, 1]))
-        scaledChannel = _sym.__mul_scalar__(inputs[0], scalar=channelScale)
-        ret = _sym.broadcast_add(scaledChannel, bias)
-        return ret
-
-
-def _broadcast_constraint():
-
-    def _broadcast_check(attrs):
-        if attrs.get('axis', None):
-            return False
-        return True
-
-    return _broadcast_check, "Specifying broadcast axis not allowed."
-
-
-def _fully_connected(opset):
-
-    def _impl(inputs, attr, params):
-        # get number of channels
-        channels = infer_channels(inputs[1], params)
-        attr['units'] = channels
-        return AttrCvt('dense', ignores=['axis', 'axis_w'])(inputs, attr)
-
-    return _impl
-
-
-class Upsample(OnnxOpConverter):
-    """ Operator converter for Upsample (nearest mode).
-    """
-
-    @classmethod
-    def _impl_v9(cls, inputs, attr, params):
-        scales = attr.get('scales')
-        if not scales:
-            #Here we are going to higher OPSET version.
-            assert len(inputs) == 2, "Upsample op take 2 inputs, {} given".format(len(inputs))
-            input_name = inputs[1].list_input_names()[0]
-            scales = params[input_name].asnumpy()
-            inputs = inputs[:1]
-        assert len(scales) == 4 and scales[0] == 1.0 and scales[1] == 1.0 and scales[2] == scales[3]
-        mode = attr.get('mode')
-        if mode == b'nearest':
-            method = "NEAREST_NEIGHBOR"
-        elif mode == b'linear':
-            method = "BILINEAR"
-        else:
-            raise tvm.error.OpAttributeInvalid(
-                'Value {} in attribute "mode" of operator Upsample is not valid.'.format(mode))
-        return _sym.upsampling(inputs[0], scale=int(scales[-1]), method=method, layout='NCHW')
-
-
-class Shape(OnnxOpConverter):
-    """ Operator converter for Shape.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        # Result of this operator is prominently used by reshape operator.
-        # Just pass the input as it is so that reshape_like can be used there.
-        print("Shape: Differently implemented in NNVM as a bypass (dummy operator)")
-        return inputs[0]
-
-class Cast(OnnxOpConverter):
-    """ Operator converter for Cast.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        return AttrCvt(op_name='cast', transforms={'to': 'dtype'})(inputs, attr)
-
-    @classmethod
-    def _impl_v5(cls, inputs, attr, params):
-        try:
-            from onnx.mapping import TENSOR_TYPE_TO_NP_TYPE
-            attr['to'] = TENSOR_TYPE_TO_NP_TYPE[attr['to']]
-        except ImportError as e:
-            raise ImportError(
-                "Unable to import onnx.mapping which is required {}".format(e))
-        return AttrCvt(op_name='cast', transforms={'to': 'dtype'})(inputs, attr)
-
-
-class Unsqueeze(OnnxOpConverter):
-    """ Operator converter for Unsqueeze.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        for axes in attr['axes']:
-            inputs[0] = _sym.expand_dims(inputs[0], axis=axes, num_newaxis=1)
-        return inputs[0]
-
-
-class Split(OnnxOpConverter):
-    """ Operator converter for Split.
-    """
-
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        attr['indices_or_sections'] = []
-        index = 0
-        for i in attr['split'][:-1]:
-            index += i
-            attr['indices_or_sections'].append(index)
-        return AttrCvt(
-            op_name='split',
-            ignores=['split'])(inputs, attr, params)
-
-
-class Slice(OnnxOpConverter):
-    """ Operator converter for Slice.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        if isinstance(attr['starts'], int):
-            attr['starts'] = (attr['starts'],)
-            attr['ends'] = (attr['ends'],)
-
-        try:
-            # Update the starts and ends according to axes if required.
-            if isinstance(attr['axes'], int):
-                attr['axes'] = (attr['axes'],)
-
-            if (max(attr['axes']) + 1) != len(attr['axes']):
-                new_axes = []
-                new_starts = []
-                new_ends = []
-                pop_index = 0
-                for i in range(max(attr['axes']) + 1):
-                    if i in attr['axes']:
-                        new_axes.append(i)
-                        new_starts.append(attr['starts'][pop_index])
-                        new_ends.append(attr['ends'][pop_index])
-                        pop_index += 1
-                    else:
-                        new_axes.append(i)
-                        new_starts.append(0)
-                        new_ends.append(np.iinfo(np.int32).max)
-                attr['axes'] = new_axes
-                attr['starts'] = new_starts
-                attr['ends'] = new_ends
-        except KeyError:
-            pass
-
-        return AttrCvt(op_name='strided_slice',
-                       transforms={'starts': 'begin',
-                                   'ends': 'end'},
-                       ignores=['axes'])(inputs, attr)
-
-class Gather(OnnxOpConverter):
-    """ Operator converter for Gather.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        axis = attr.get('axis', 0)
-        return AttrCvt(op_name='take',
-                       extras={'axis':axis})(inputs, attr)
-
-class LRN(OnnxOpConverter):
-    """ Operator converter for Local Response Normalization.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        """LRN support only NCHW format
-        https://github.com/onnx/onnx/blob/master/docs/Operators.md#LRN
-        """
-        axis = 1
-        alpha = attr.get('alpha', 0.0001)
-        beta = attr.get('beta', 0.75)
-        bias = attr.get('bias', 1.0)
-        nsize = attr.get('size')
-        return _sym.lrn(inputs[0], size=nsize, axis=axis,
-                        alpha=alpha, beta=beta, bias=bias)
-
-class Maximum(OnnxOpConverter):
-    """ Operator converter for Maximum.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        if not isinstance(inputs, list) or len(inputs) < 2:
-            raise ValueError("Expect minimum 2 inputs")
-        _max = inputs[0]
-        for i in range(1, len(inputs)):
-            _max = AttrCvt(op_name='broadcast_max')([_max, inputs[i]], {})
-        return _max
-
-class Minimum(OnnxOpConverter):
-    """ Operator converter for Minimum.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        if not isinstance(inputs, list) or len(inputs) < 2:
-            raise ValueError("Expect minimum 2 inputs")
-        _min = inputs[0]
-        for i in range(1, len(inputs)):
-            _min = AttrCvt(op_name='broadcast_min')([_min, inputs[i]], {})
-        return _min
-
-class Mean(OnnxOpConverter):
-    """ Operator converter for Mean.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        if not isinstance(inputs, list) or len(inputs) < 2:
-            raise ValueError("Expect minimum 2 inputs")
-        count = len(inputs)
-        _sum = inputs[0]
-        for i in range(1, count):
-            _sum = AttrCvt(op_name='broadcast_add')([_sum, inputs[i]], {})
-        return _sum / count
-
-class HardSigmoid(OnnxOpConverter):
-    """ Operator converter for HardSigmoid.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        alpha = attr.get('alpha', 0.2)
-        beta = attr.get('beta', 0.5)
-        transformX = (inputs[0] * alpha) + beta
-        attr = {'a_min':0, 'a_max':1}
-        return AttrCvt(op_name='clip')([transformX], attr)
-
-class ArgMax(OnnxOpConverter):
-    """ Operator converter for ArgMax.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        axis = attr.get('axis', 0)
-        keepdims = attr.get('keepdims', True)
-        attr = {'axis':axis, 'keepdims':keepdims}
-        return AttrCvt(op_name='argmax')(inputs, attr)
-
-class ArgMin(OnnxOpConverter):
-    """ Operator converter for ArgMin.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        axis = attr.get('axis', 0)
-        keepdims = attr.get('keepdims', True)
-        attr = {'axis':axis, 'keepdims':keepdims}
-        return AttrCvt(op_name='argmin')(inputs, attr)
-
-class Softmax(OnnxOpConverter):
-    """ Operator converter for Softmax.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        # set default value when axis is not set in the model
-        if 'axis' not in attr:
-            attr['axis'] = 1
-        return AttrCvt(
-            op_name='softmax',
-            transforms={
-                'axis': ('axis', 1),
-            })(inputs, attr, params)
-
-class ConstantFill(OnnxOpConverter):
-    """ Operator converter for ConstantFill.
-    """
-    @classmethod
-    def _impl_v1(cls, inputs, attr, params):
-        is_full = True
-        num_inputs = len(inputs)
-        if 'shape' in attr:
-            if num_inputs > 0:
-                raise ImportError(
-                    "Can't set shape and input tensor at a time")
-            shape = attr.pop('shape')
-        else:
-            if num_inputs == 0:
-                raise ImportError(
-                    "Either shape attribute or input should be set")
-            if 'input_as_shape' in attr and attr['input_as_shape']:
-                shape = params[inputs[0].list_output_names()[0]].asnumpy()
-            else:
-                is_full = False
-
-        if not is_full:
-            if 'extra_shape' in attr:
-                raise ImportError(
-                    "Extra Shape not supported with fill_like")
-
-            out = AttrCvt(
-                op_name='full_like',
-                transforms={'value': 'fill_value'},
-                ignores=['dtype'])(inputs, attr)
-            return _sym.cast(out, dtype=attr['dtype'].decode("utf-8"))
-        if 'extra_shape' in attr:
-            shape = shape + attr.pop('extra_shape')
-
-        return AttrCvt(
-            op_name='full',
-            transforms={'value': 'fill_value'},
-            extras={'shape':shape})(inputs, attr)
-
-# compatible operators that do NOT require any conversion.
-_identity_list = []
-
-
-# _convert_map defines maps of name to converter functor(callable)
-# for 1 to 1 mapping, use Renamer if nothing but name is different
-# use AttrCvt if attributes need to be converted
-# for 1 to N mapping(composed), use custom callable functions
-# for N to 1 mapping, currently not supported(?)
-def _get_convert_map(opset):
-    return {
-        # defs/experimental
-        'Identity': Renamer('copy'),
-        # 'Affine'
-        'ThresholdedRelu': ThresholdedRelu.get_converter(opset),
-        'ScaledTanh': ScaledTanh.get_converter(opset),
-        'ParametricSoftplus': ParametricSoftPlus.get_converter(opset),
-        'ConstantFill': ConstantFill.get_converter(opset),
-        # 'GivenTensorFill'
-        'FC': AttrCvt('dense', ignores=['axis', 'axis_w']),
-        'Scale': Scale.get_converter(opset),
-        # 'GRUUnit'
-        # 'ATen'
-        'ImageScaler': ImageScaler.get_converter(opset),
-        # 'MeanVarianceNormalization'
-        # 'Crop'
-        # 'Embedding'
-        'Upsample' : Upsample.get_converter(opset),
-        'SpatialBN': BatchNorm.get_converter(opset),
-
-        # defs/generator
-        # 'Constant' # Implemented
-        # 'RandomUniform'
-        # 'RandomNormal'
-        # 'RandomUniformLike'
-        # 'RandomNormalLike'
-
-        # defs/logical
-
-        # defs/math
-        'Add': Add.get_converter(opset),
-        'Sub': Sub.get_converter(opset),
-        'Mul': Mul.get_converter(opset),
-        'Div': Div.get_converter(opset),
-        'Neg': Renamer('negative'),
-        'Abs': Absolute.get_converter(opset),
-        'Reciprocal': Reciprocal.get_converter(opset),
-        'Floor': Renamer('floor'),
-        'Ceil': Renamer('ceil'),
-        'Sqrt': Renamer('sqrt'),
-        'Relu': Renamer('relu'),
-        'LeakyRelu': Renamer('leaky_relu'),
-        'Selu': Selu.get_converter(opset),
-        'Elu': Elu.get_converter(opset),
-        'Exp': Renamer('exp'),
-        'Log': Renamer('log'),
-        'Tanh': Renamer('tanh'),
-        'Pow': Renamer('broadcast_pow'),
-        'PRelu': Prelu.get_converter(opset),
-        'Sigmoid': Renamer('sigmoid'),
-        'HardSigmoid': HardSigmoid.get_converter(opset),
-        'Max': Maximum.get_converter(opset),
-        'Min': Minimum.get_converter(opset),
-        'Sum': Sum.get_converter(opset),
-        'Mean': Mean.get_converter(opset),
-        'Clip': AttrCvt('clip', transforms={'min': 'a_min', 'max': 'a_max'}),
-        # softmax default axis is different in onnx
-        'Softmax': Softmax.get_converter(opset),
-        'LogSoftmax': AttrCvt('log_softmax', {'axis': ('axis', 1)}),
-        # 'Hardmax'
-        'Softsign': Softsign.get_converter(opset),
-        'SoftPlus': SoftPlus.get_converter(opset),
-        'Gemm': Gemm.get_converter(opset),
-        'MatMul': Renamer('matmul'),
-
-        # defs/nn
-        'AveragePool': AveragePool.get_converter(opset),
-        'MaxPool': MaxPool.get_converter(opset),
-        'Conv': Conv.get_converter(opset),
-        'ConvTranspose': ConvTranspose.get_converter(opset),
-        'GlobalAveragePool': Renamer('global_avg_pool2d'),
-        'GlobalMaxPool': Renamer('global_max_pool2d'),
-        'BatchNormalization': BatchNorm.get_converter(opset),
-        # 'InstanceNormalization'
-        # 'LpNormalization'
-        'Dropout': AttrCvt('dropout', {'ratio': 'rate'}, ignores=['is_test']),
-        'Flatten': Renamer('flatten'),
-        'LRN': LRN.get_converter(opset),
-
-        # defs/reduction
-        'ReduceMax': AttrCvt('max', {'axes': 'axis'}),
-        'ReduceMin': AttrCvt('min', {'axes': 'axis'}),
-        'ReduceSum': AttrCvt('sum', {'axes': 'axis'}),
-        'ReduceMean': AttrCvt('mean', {'axes': 'axis'}),
-        # 'ReduceProd'
-        # 'ReduceLogSumExp'
-        'ArgMax': ArgMax.get_converter(opset),
-        'ArgMin': ArgMin.get_converter(opset),
-
-        # defs/tensor
-        'Cast': Cast.get_converter(opset),
-        'Reshape': Reshape.get_converter(opset),
-        'Concat': Renamer('concatenate'),
-        'Split': Split.get_converter(opset),
-        'Slice': Slice.get_converter(opset),
-        'Transpose': AttrCvt('transpose', {'perm': 'axes'}),
-        'Gather': Gather.get_converter(opset),
-        'Squeeze': AttrCvt('squeeze', {'axes': 'axis'}),
-        'Unsqueeze': Unsqueeze.get_converter(opset),
-        'Pad': Pad.get_converter(opset),
-        'Shape': Shape.get_converter(opset),
-    }
-
-
-class GraphProto(object):
-    """A helper class for handling nnvm graph copying from pb2.GraphProto.
-    Definition: https://github.com/onnx/onnx/blob/master/onnx/onnx.proto
-    """
-
-    def __init__(self):
-        self._nodes = {}
-        self._params = {}
-        self._renames = {}
-        self._num_input = 0
-        self._num_param = 0
-
-    def from_onnx(self, graph, opset):
-        """Construct nnvm nodes from onnx graph.
-        The inputs from onnx graph is vague, only providing "1", "2"...
-        For convenience, we rename the `real` input names to "input_0",
-        "input_1"... And renaming parameters to "param_0", "param_1"...
-
-        Parameters
-        ----------
-        graph : onnx protobuf object
-            The loaded onnx graph
-        opset : opset version
-
-        Returns
-        -------
-        sym : nnvm.sym.Symbol
-            The returned nnvm symbol
-        params : dict
-            A dict of name: tvm.nd.array pairs, used as pretrained weights
-        """
-        # parse network inputs to nnvm, aka parameters
-        for init_tensor in graph.initializer:
-            if not init_tensor.name.strip():
-                raise ValueError("Tensor's name is required.")
-            self._params[init_tensor.name] = self._parse_array(init_tensor)
-        for i in graph.input:
-            # from onnx v0.2, GraphProto.input has type ValueInfoProto,
-            #  and the name is 'i.name'
-            i_name = self._parse_value_proto(i)
-            if i_name in self._params:
-                # i is a param instead of input
-                self._num_param += 1
-                self._params[i_name] = self._params.pop(i_name)
-                self._nodes[i_name] = _sym.Variable(
-                    name=i_name, shape=self._params[i_name].shape)
-            else:
-                self._num_input += 1
-                self._nodes[i_name] = _sym.Variable(name=i_name)
-        # get list of unsupported ops
-        convert_map = _get_convert_map(opset)
-        unsupported_ops = set()
-        for node in graph.node:
-            op_name = node.op_type
-            if op_name not in convert_map and \
-               op_name != 'Constant' and \
-               op_name not in _identity_list:
-                unsupported_ops.add(op_name)
-        if unsupported_ops:
-            msg = 'The following operators are not supported for frontend ONNX: '
-            msg += ', '.join(unsupported_ops)
-            raise tvm.error.OpNotImplemented(msg)
-        # construct nodes, nodes are stored as directed acyclic graph
-        for node in graph.node:
-            op_name = node.op_type
-            attr = self._parse_attr(node.attribute)
-            inputs = [self._nodes[self._renames.get(i, i)] for i in node.input]
-            if op_name == "Constant":
-                t_proto = self._parse_attr(node.attribute)["value"]
-                self._num_param += 1
-                self._params[node.output[0]] = self._parse_array(t_proto)
-                self._nodes[node.output[0]] = _sym.Variable(name=node.output[0],
-                                                            shape=list(t_proto.dims))
-            else:
-                op = self._convert_operator(op_name, inputs, attr, opset)
-                node_output = self._fix_outputs(op_name, node.output)
-                assert len(node_output) == len(op.list_output_names()), (
-                    "Number of output mismatch {} vs {} in {}.".format(
-                        len(node_output), len(op.list_output_names()), op_name))
-                for k, i in zip(list(node_output), range(len(node_output))):
-                    self._nodes[k] = op[i]
-        # now return the outputs
-        out = [self._nodes[self._parse_value_proto(i)] for i in graph.output]
-        if len(out) > 1:
-            out = _sym.Group(out)
-        else:
-            out = out[0]
-        return out, self._params
-
-    def _parse_value_proto(self, value_proto):
-        """Parse ValueProto or raw str."""
-        try:
-            name = value_proto.name
-        except AttributeError:
-            name = value_proto
-        return name
-
-    def _parse_array(self, tensor_proto):
-        """Grab data in TensorProto and convert to numpy array."""
-        try:
-            from onnx.numpy_helper import to_array
-        except ImportError as e:
-            raise ImportError(
-                "Unable to import onnx which is required {}".format(e))
-        np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
-        return tvm.nd.array(np_array)
-
-    def _parse_attr(self, attr_proto):
-        """Convert a list of AttributeProto to a dict, with names as keys."""
-        attrs = {}
-        for a in attr_proto:
-            for f in ['f', 'i', 's']:
-                if a.HasField(f):
-                    attrs[a.name] = getattr(a, f)
-            for f in ['floats', 'ints', 'strings']:
-                if list(getattr(a, f)):
-                    assert a.name not in attrs, "Only one type of attr is allowed"
-                    attrs[a.name] = tuple(getattr(a, f))
-            for f in ['t']:
-                if a.HasField(f):
-                    attrs[a.name] = getattr(a, f)
-            for f in ['tensors']:
-                if list(getattr(a, f)):
-                    assert a.name not in attrs, "Only one type of attr is allowed"
-                    attrs[a.name] = tuple(getattr(a, f))
-            for f in ['g']:
-                if a.HasField(f):
-                    raise NotImplementedError(
-                        "Filed {} is not supported in nnvm.".format(f))
-            for f in ['graphs']:
-                if list(getattr(a, f)):
-                    raise NotImplementedError(
-                        "Filed {} is not supported in nnvm.".format(f))
-            if a.name not in attrs:
-                raise ValueError("Cannot parse attribute: \n{}\n.".format(a))
-        return attrs
-
-    def _convert_operator(self,
-                          op_name,
-                          inputs,
-                          attrs,
-                          opset,
-                          identity_list=None,
-                          convert_map=None):
-        """Convert from onnx operator to nnvm operator.
-        The converter must specify conversions explicitly for incompatible name, and
-        apply handlers to operator attributes.
-
-        Parameters
-        ----------
-        op_name : str
-            Operator name, such as Convolution, FullyConnected
-        inputs : list of nnvm.Symbol
-            List of input symbols.
-        attrs : dict
-            Dict of operator attributes
-        opset : int
-            Opset version
-        identity_list : list
-            List of operators that don't require conversion
-        convert_map : dict
-            Dict of name : callable, where name is the op's name that
-            require conversion to nnvm, callable are functions which
-            take attrs and return (new_op_name, new_attrs)
-
-        Returns
-        -------
-        sym : nnvm.Symbol
-            Converted nnvm Symbol
-        """
-        identity_list = identity_list if identity_list else _identity_list
-        convert_map = convert_map if convert_map else _get_convert_map(opset)
-        if op_name in identity_list:
-            sym = get_nnvm_op(op_name)(*inputs, **attrs)
-        elif op_name in convert_map:
-            sym = convert_map[op_name](inputs, attrs, self._params)
-        else:
-            raise tvm.error.OpNotImplemented(
-                'Operator {} is not supported in frontend ONNX.')
-        return sym
-
-    def _fix_outputs(self, op_name, outputs):
-        """A hack to handle dropout or similar operator that have more than one out
-        in ONNX.
-        """
-        if op_name == 'Dropout':
-            if len(outputs) == 1:
-                return outputs
-            # TODO(zhreshold): support dropout mask?
-            outputs = outputs[:-1]
-        return outputs
-
-
-def from_onnx(model):
-    """Load onnx graph which is a python protobuf object into nnvm graph.
-    The companion parameters will be handled automatically.
-    The inputs from onnx graph is vague, only providing "1", "2"...
-    For convenience, we rename the `real` input names to "input_0",
-    "input_1"... And renaming parameters to "param_0", "param_1"...
-
-    Parameters
-    ----------
-    model : protobuf object
-        ONNX ModelProto after ONNX v1.1.0
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.ndarray
-        Dict of converted parameters stored in tvm.ndarray format
-    """
-    g = GraphProto()
-    graph = model.graph
-    try:
-        opset = model.opset_import[0].version if model.opset_import else 1
-    except AttributeError:
-        opset = 1
-    sym, params = g.from_onnx(graph, opset)
-    return sym, params
diff --git a/nnvm/python/nnvm/frontend/onnx_caffe2_utils.py b/nnvm/python/nnvm/frontend/onnx_caffe2_utils.py
deleted file mode 100644
index 18f9263ecc0b..000000000000
--- a/nnvm/python/nnvm/frontend/onnx_caffe2_utils.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Util functions shared by the ONNX and Caffe2 frontends."""
-from __future__ import absolute_import as _abs
-from nnvm import graph as _graph
-from nnvm.compiler import graph_util
-
-
-def dimension_picker(prefix, surfix=''):
-    def _impl(attr):
-        kernel = attr['kernel_shape']
-        if len(kernel) == 2:
-            return prefix + '2d' + surfix
-        raise NotImplementedError("Only 2d kernel supported.")
-
-    return _impl
-
-
-def dimension_constraint():
-    def _dim_check(attrs):
-        if len(attrs['kernel_shape']) == 2:
-            return True
-        return False
-
-    return _dim_check, "Only 2d kernel supported."
-
-
-def infer_channels(inputs, params, transpose=False):
-    """A hack for getting 'channels' or 'units' since caffe2 don't provide
-    these attributes. We check the shape of weights provided to get the number.
-    """
-    g = _graph.create(inputs)
-    shape_dict = {k: v.shape for k, v in params.items()}
-    _, out_shapes = graph_util.infer_shape(g, **shape_dict)
-    channels = out_shapes[0][0] if not transpose else out_shapes[0][1]
-    return channels
-
-
-def revert_caffe2_pad(pads):
-    """Caffe2 require two times the normal padding."""
-    if len(pads) == 4:
-        pads = pads[:2]
-    elif len(pads) == 2:
-        pass
-    else:
-        raise ValueError("Invalid caffe2 type padding: {}".format(pads))
-    return pads
diff --git a/nnvm/python/nnvm/frontend/tensorflow.py b/nnvm/python/nnvm/frontend/tensorflow.py
deleted file mode 100644
index a7674f217e90..000000000000
--- a/nnvm/python/nnvm/frontend/tensorflow.py
+++ /dev/null
@@ -1,1613 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument, too-many-lines
-"""TF: Tensorflow frontend."""
-from __future__ import absolute_import as _abs
-from __future__ import print_function
-
-import warnings
-# Numpy support
-import numpy as np
-
-import tvm
-from .. import symbol as _sym
-from .. import graph as _graph
-from .. compiler import graph_util, build_module
-from .common import get_nnvm_op, AttrConverter as AttrConvert
-
-__all__ = ['from_tensorflow']
-
-class AttrCvt(object):
-    """A Wrapper to handle some common jobs:
-    """
-    def __init__(self, op_name, transforms=None,
-                 excludes=None, disables=None, ignores=None,
-                 extras=None, custom_check=None):
-        self._op_name = op_name
-        self._transforms = transforms if transforms else {}
-        self._excludes = excludes if excludes else []
-        self._disables = disables if disables else []
-        self._ignores = ignores if ignores else []
-        self._extras = extras if extras else {}
-        self._custom_check = custom_check
-
-    def __call__(self, inputs, attrs, *args):
-        self._ignores.append('_output_shapes')
-        self._ignores.append('_input_shapes')
-        self._ignores.append('T')
-        self._ignores.append('use_cudnn_on_gpu')
-        self._ignores.append('_node_name')
-        self._ignores.append('is_training')
-        self._ignores.append('_target_layout')
-        self._ignores.append('_input_0d_mismatch')
-        # Retain the names
-        try:
-            attrs['name'] = attrs['_node_name']
-        except KeyError:
-            pass
-        return AttrConvert(self._op_name, self._transforms, self._excludes,
-                           self._disables, self._ignores, self._extras,
-                           self._custom_check)(inputs, attrs, *args)
-
-def _get_pad_pair(input1d, kernel1d, stride1d):
-    if input1d % stride1d == 0:
-        pad = max(kernel1d - stride1d, 0)
-    else:
-        pad = max(kernel1d - (input1d % stride1d), 0)
-
-    pad_before = pad // 2
-    pad_after = pad - pad_before
-
-    return [pad_before, pad_after]
-
-def _math_name_picker(surfix):
-    def _impl(attr):
-        return 'broadcast_' + surfix
-    return _impl
-
-def _dimension_picker(prefix, surfix=''):
-    def _impl(attr):
-        kernel = attr['kernel_shape']
-        if len(kernel) == 2:
-            return prefix + '2d' + surfix
-        raise tvm.error.OpAttributeUnImplemented(
-            'Non-2D kernels are not supported for operator {}.'.format(prefix))
-    return _impl
-
-def _dimension_constraint():
-    def _dim_check(attrs):
-        if len(attrs['kernel_shape']) == 2:
-            return True
-        return False
-    return _dim_check, "Only 2d kernel supported."
-
-def _infer_channels(inputs, params, transpose=False):
-    """A hack for getting 'channles' or 'units' since tensorflow don't provide
-    these attributes. We check the shape of weights provided to get the number.
-    """
-    g = _graph.create(inputs)
-    shape_dict = {k: v.shape for k, v in params.items()}
-    _, out_shapes = graph_util.infer_shape(g, **shape_dict)
-    channels = out_shapes[0][0] if not transpose else out_shapes[0][1]
-    return channels
-
-def _rsqrt():
-    def _impl(inputs, attr, *args):
-        return AttrCvt(op_name="__pow_scalar__", extras={'scalar': -0.5})(inputs, attr)
-    return _impl
-
-def _argx(func, func_name):
-    """ A common wrapper for argmin and argmax operations """
-    def _impl(inputs, attr, params):
-        try:
-            # In Tensorflow, `axis` argument is a Tensor, not attribute. We
-            # support the case where it inputs from a scalar constant.
-            axis_input_name = inputs[1].list_output_names()[0]
-            axis_input_vlaue = params[axis_input_name].asnumpy()[0]
-        except (IndexError, KeyError):
-            raise TypeError( \
-                "Unsupported argument for `{}` : `axis` should be a constant".format(func_name))
-        return func(inputs[0], axis=axis_input_vlaue, keepdims=False)
-    return _impl
-
-def _elemwise(name):
-    def _impl(inputs, attr, *args):
-        assert len(inputs) == 2, "{} take 2 inputs, {} given".format(name, len(inputs))
-        op_name = _math_name_picker(name)(attr)
-        return get_nnvm_op(op_name)(*inputs)
-    return _impl
-
-def _pooling(name):
-    def _impl(inputs, attr, params):
-
-        attr['data_format'] = attr['data_format'].decode("utf-8")
-        flip_layout = False
-
-        input_shape = attr['_input_shapes'][inputs[0]]
-
-        if attr['data_format'] == 'NHWC':
-            attr['kernel_shape'] = (attr['ksize'][1], attr['ksize'][2])
-            attr['strides'] = (attr['strides'][1], attr['strides'][2])
-        elif attr['data_format'] == 'NCHW':
-            attr['kernel_shape'] = (attr['ksize'][2], attr['ksize'][3])
-            attr['strides'] = (attr['strides'][2], attr['strides'][3])
-        else:
-            msg = 'Value {} in attribute "data_format" of operator Pooling is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(attr['data_format']))
-
-        if attr['_target_layout'] == "NCHW" and attr['data_format'] == "NHWC":
-            tmp_shape = attr['_input_shapes'][inputs[0]]
-            input_shape = [tmp_shape[ii] for ii in (0, 3, 1, 2)]
-            inputs[0] = _sym.transpose(inputs[0], axes=(0, 3, 1, 2))
-            attr['data_format'] = "NCHW"
-            flip_layout = True
-
-        # Fix padding
-        attr['padding'] = attr['padding'].decode("utf-8")
-
-        if attr['padding'] == 'VALID':
-            attr['padding'] = [0, 0]
-        elif attr['padding'] == 'SAME':
-            stride_h, stride_w = attr['strides']
-            kernel_h, kernel_w = attr['kernel_shape']
-            if attr['data_format'] == 'NHWC':
-                in_h = input_shape[1]
-                in_w = input_shape[2]
-            else:
-                in_h = input_shape[2]
-                in_w = input_shape[3]
-
-            pad_v = _get_pad_pair(in_h, kernel_h, stride_h)
-            pad_h = _get_pad_pair(in_w, kernel_w, stride_w)
-
-            attr['padding'] = [pad_v[0], pad_h[0], pad_v[1], pad_h[1]]
-        else:
-            msg = 'Value {} in attribute "padding" of operator Pooling is not valid.'
-            raise tvm.error.OpAttributeUnImplemented(msg.format(attr['padding']))
-
-        if name == "avg_pool":
-            attr['count_include_pad'] = False
-
-        out = AttrCvt(
-            op_name=_dimension_picker(name),
-            transforms={
-                'kernel_shape':'pool_size',
-                'data_format':'layout'},
-            ignores=['ksize'],
-            extras={'ceil_mode': False},
-            custom_check=_dimension_constraint())(inputs, attr)
-
-        if flip_layout:
-            out = _sym.transpose(out, axes=(0, 2, 3, 1))
-
-        return out
-    return _impl
-
-def _conv(opname):
-    def _impl(inputs, attr, params):
-        attr['data_format'] = attr['data_format'].decode("utf-8")
-        flip_layout = False
-
-        # NCHW Layout require weights transpose
-        if attr['data_format'] == 'NCHW':
-            tmp_shape = attr['_input_shapes'][inputs[1]]
-            if opname == 'conv':
-                tmp_shape = [tmp_shape[ii] for ii in (3, 2, 0, 1)]
-                inputs[1] = _sym.transpose(inputs[1], axes=(3, 2, 0, 1))
-            else:
-                tmp_shape = [tmp_shape[ii] for ii in (2, 3, 0, 1)]
-                inputs[1] = _sym.transpose(inputs[1], axes=(2, 3, 0, 1))
-            attr['_input_shapes'][inputs[1]] = tmp_shape
-
-        input_shape = attr['_input_shapes'][inputs[0]]
-        weights_shape = attr['_input_shapes'][inputs[1]]
-
-        if attr['_target_layout'] == "NCHW" and attr['data_format'] == "NHWC":
-            input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
-            inputs[0] = _sym.transpose(inputs[0], axes=(0, 3, 1, 2))
-            if opname == 'conv':
-                weights_shape = [weights_shape[ii] for ii in (3, 2, 0, 1)]
-                inputs[1] = _sym.transpose(inputs[1], axes=(3, 2, 0, 1))
-            else:
-                weights_shape = [weights_shape[ii] for ii in (2, 3, 0, 1)]
-                inputs[1] = _sym.transpose(inputs[1], axes=(2, 3, 0, 1))
-
-            attr['data_format'] = "NCHW"
-            attr['strides'] = [attr['strides'][ii] for ii in (0, 3, 1, 2)]
-            flip_layout = True
-
-        if attr['data_format'] == 'NHWC':
-            kernel_h, kernel_w, _, depth_mult = weights_shape
-            attr['kernel_shape'] = (weights_shape[0], weights_shape[1])
-            if opname == 'conv':
-                attr['channels'] = weights_shape[3]
-            else:
-                attr['channels'] = input_shape[3] * depth_mult
-
-            if 'dilations' in attr:
-                attr['dilations'] = (attr['dilations'][1], attr['dilations'][2])
-            attr['strides'] = (attr['strides'][1], attr['strides'][2])
-        elif attr['data_format'] == 'NCHW':
-            _, depth_mult, kernel_h, kernel_w = weights_shape
-            attr['kernel_shape'] = (weights_shape[2], weights_shape[3])
-            if opname == 'conv':
-                attr['channels'] = weights_shape[0]
-            else:
-                attr['channels'] = input_shape[1] * depth_mult
-                if attr['channels'] < 0:
-                    attr['channels'] *= -1
-
-            if 'dilations' in attr:
-                attr['dilations'] = (attr['dilations'][2], attr['dilations'][3])
-            attr['strides'] = (attr['strides'][2], attr['strides'][3])
-        else:
-            msg = 'Value {} in attribute "data_format" of operator Conv is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(attr['data_format']))
-
-
-        if opname == 'depthwise':
-            if depth_mult > 1:
-                raise tvm.error.OpNotImplemented('depth_mult > 1 of operator DepthwiseConv2dNative'
-                                                 ' is not supported.')
-            attr['groups'] = attr['channels']
-
-        # Fix padding
-        attr['padding'] = attr['padding'].decode("utf-8")
-
-        if attr['padding'] == 'VALID':
-            attr['padding'] = [0, 0]
-        elif attr['padding'] == 'SAME':
-            stride_h, stride_w = attr['strides']
-            kernel_h, kernel_w = attr['kernel_shape']
-            if attr['data_format'] == 'NHWC':
-                in_h = input_shape[1]
-                in_w = input_shape[2]
-            else:
-                in_h = input_shape[2]
-                in_w = input_shape[3]
-
-            dilation_h = attr['dilations'][0]
-            dilation_w = attr['dilations'][1]
-            dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
-            dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
-            pad_v = _get_pad_pair(in_h, dilated_kernel_h, stride_h)
-            pad_h = _get_pad_pair(in_w, dilated_kernel_w, stride_w)
-
-            if attr['data_format'] == 'NHWC':
-                inputs[0] = _sym.pad(data=inputs[0],
-                                     pad_width=((0, 0),
-                                                (pad_v[0], pad_v[1]),
-                                                (pad_h[0], pad_h[1]),
-                                                (0, 0)))
-            else:
-                inputs[0] = _sym.pad(data=inputs[0],
-                                     pad_width=((0, 0),
-                                                (0, 0),
-                                                (pad_v[0], pad_v[1]),
-                                                (pad_h[0], pad_h[1])))
-
-            attr['padding'] = [0, 0]
-
-        else:
-            msg = 'Value {} in attribute "padding" of operator Conv is not valid.'
-            raise tvm.error.OpAttributeInvalid(msg.format(attr['padding']))
-
-        if 'kernel_layout' not in attr:
-            if opname == 'conv':
-                attr['kernel_layout'] = 'HWIO' if attr['data_format'] == 'NHWC' else 'OIHW'
-            else:
-                attr['kernel_layout'] = 'HWOI' if attr['data_format'] == 'NHWC' else 'OIHW'
-
-        out = AttrCvt(
-            op_name=_dimension_picker('conv'),
-            transforms={
-                'kernel_shape': 'kernel_size',
-                'data_format': 'layout',
-                'dilations': ('dilation', (0, 0)),
-                'group': ('groups', 1)},
-            extras={'use_bias': len(inputs) == 3},
-            custom_check=_dimension_constraint())(inputs, attr)
-
-        if flip_layout:
-            out = _sym.transpose(out, axes=(0, 2, 3, 1))
-
-        return out
-    return _impl
-
-def _decode_image():
-    def _impl(inputs, attr, params):
-        # Image decode wrapper: Expecting user to feed decoded input to next layer drop this layer.
-        warnings.warn("DecodeJpeg: It's a pass through, "
-                      "please handle preprocessing before input")
-        return inputs[0]
-    return _impl
-
-def _cast():
-    def _impl(inputs, attr, params):
-        # Convert from tensorflow Dtype to str
-        attr['DstT'] = attr['DstT'].name
-        return AttrCvt(op_name='cast', transforms={'DstT': 'dtype'},
-                       ignores=['SrcT', 'Truncate'])(inputs, attr)
-    return _impl
-
-def _expand_dims():
-    def _impl(inputs, attr, params):
-        dim_input = inputs.pop(1)
-        axis = params[dim_input.list_output_names()[0]]
-        params.pop(dim_input.list_output_names()[0])
-        return _expand_dims_0d_aware(inputs[0], attr, axis=axis.asnumpy()[0])
-    return _impl
-
-def _resize_bilinear():
-    def _impl(inputs, attr, params):
-        attr['size'] = attr['_output_shapes'][0][1:3]
-        inputs.pop(1)
-        # NHWC
-        attr['layout'] = 'NHWC'
-
-        return AttrCvt(op_name="resize",
-                       ignores=['Tdim'],
-                       extras={'method': "BILINEAR"})(inputs, attr)
-    return _impl
-
-def _check_numerics():
-    def _impl(inputs, attr, params):
-        # Making a copy node assuming no need to verify
-        return AttrCvt(op_name="copy", ignores=['message'])(inputs, attr)
-    return _impl
-
-
-def _matmul():
-    def _impl(inputs, attr, params):
-        channels = _infer_channels(inputs[1], params, not attr['transpose_b'])
-        if attr['transpose_a']:
-            inputs[0] = _sym.transpose(inputs[0], axes=(1, 0))
-        if not attr['transpose_b']:
-            inputs[1] = _sym.transpose(inputs[1], axes=(1, 0))
-        return AttrCvt(op_name="dense",
-                       extras={'use_bias': False, 'units': channels},
-                       ignores=['transpose_a', 'transpose_b', 'T'])(inputs, attr)
-
-    return _impl
-
-def _undef():
-    def _impl(inputs, attr, params):
-        return _sym.__undef__()
-    return _impl
-
-def _identity():
-    def _impl(inputs, attr, params):
-        return inputs[0]
-    return _impl
-
-def _concatV2():
-    def _impl(inputs, attr, params):
-        pop_node = inputs.pop(len(inputs)-1)
-        axis = params[pop_node.list_output_names()[0]]
-        params.pop(pop_node.list_output_names()[0])
-        return AttrCvt(
-            op_name="concatenate", ignores=['T', 'N', 'Tidx'],
-            extras={'axis': axis.asnumpy()[0]})(inputs, attr)
-    return _impl
-
-def _concat():
-    def _impl(inputs, attr, params):
-        pop_node = inputs.pop(0)
-        axis = params[pop_node.list_output_names()[0]]
-        params.pop(pop_node.list_output_names()[0])
-        return AttrCvt(
-            op_name="concatenate", ignores=['N'],
-            extras={'axis': axis.asnumpy()[0]})(inputs, attr)
-    return _impl
-
-def _pack():
-    def _impl(inputs, attr, params):
-        axis = int(attr["axis"])
-        inputs_reshaped = [_expand_dims_0d_aware(i, attr, axis=axis, num_newaxis=1) for i in inputs]
-        return _sym.concatenate(*inputs_reshaped, axis=axis, name=attr["_node_name"])
-
-    return _impl
-
-def _slice():
-    def _impl(inputs, attr, params):
-        begin = params.pop(inputs[1].list_output_names()[0]).asnumpy().tolist()
-        size = params.pop(inputs[2].list_output_names()[0]).asnumpy().tolist()
-        data_shape = attr['_input_shapes'][inputs[0]]
-        data_dim = len(data_shape)
-        end = size
-        for i in range(data_dim):
-            if size[i] == -1:
-                end[i] = data_shape[i] - begin[i]
-            else:
-                end[i] += begin[i]
-        return _sym.strided_slice(inputs[0], begin=begin, end=size)
-    return _impl
-
-def _reshape():
-    def _impl(inputs, attr, params):
-        try:
-            pop_node = inputs[1]
-            shape_arg = params.pop(pop_node.list_output_names()[0])
-            inputs.pop(1)
-
-            return AttrCvt(
-                op_name="reshape",
-                extras={'shape':tuple(shape_arg.asnumpy())},
-                ignores=['Tshape'])(inputs, attr)
-        except KeyError:
-            # Shape operator is already pruned, hence
-            # try to infer shape by precompute prune if possible.
-            if all(in_node in params for in_node in inputs[1].list_input_names()):
-                graph = _graph.create(_sym.Group(inputs[1]))
-                params_pre = {k: params[k] for k in inputs[1].list_input_names()}
-                params_new = build_module._run_graph(graph, params_pre)
-                inputs.pop(1)
-                return AttrCvt(
-                    op_name="reshape",
-                    extras={'shape':tuple(params_new[0].asnumpy().flatten())},
-                    ignores=['Tshape'])(inputs, attr)
-            raise tvm.error.OpAttributeUnimplemented(
-                'Attribute "dynamic shape" of operator Reshape is not supported.')
-    return _impl
-
-def _bias_add():
-    def _impl(inputs, attr, params):
-        if attr['data_format'].decode("utf-8") == 'NCHW':
-            bias = _sym.reshape(inputs[1], newshape=(1, -1, 1, 1))
-        else:
-            bias = inputs[1]
-        return _sym.broadcast_add(inputs[0], bias)
-    return _impl
-
-def _squeeze():
-    def _impl(inputs, attr, params):
-        return AttrCvt(
-            op_name="squeeze",
-            transforms={'squeeze_dims':'axis'},
-            ignores=['T'])(inputs, attr)
-    return _impl
-
-def _fused_batch_norm():
-    def _impl(inputs, attr, params):
-        # Tensorflow: (data, gamma, beta, moving_mean, moving_variance)
-        # NNVM:       (data, gamma, beta, moving_mean, moving_varience)
-        axis = 3
-        need_cast = False
-
-        if 'data_format' in attr:
-            attr['data_format'] = attr['data_format'].decode("utf-8")
-            if attr['data_format'] == 'NCHW':
-                axis = 1
-        if 'U' in attr:
-            need_cast = True
-            inputs[0] = _sym.cast(inputs[0], dtype=attr['U'].name)
-
-        out = AttrCvt(op_name='batch_norm',
-                      transforms={'scale_after_normalization':'scale',
-                                  'variance_epsilon':'epsilon'},
-                      extras={'axis': axis},
-                      ignores=['data_format', 'U'],
-                      disables=['momentum'])(inputs, attr)
-
-        if need_cast:
-            out = _sym.cast(out, dtype=attr['T'].name)
-        return out
-    return _impl
-
-def _batch_norm():
-    def _impl(inputs, attr, params):
-        # Rearrange inputs from
-        # (data, moving_mean, moving_variance, beta, gamma)
-        #     to
-        # (data, gamma, beta, moving_mean, moving_var)
-        new_inputs = [inputs[0], inputs[4], inputs[3], inputs[1], inputs[2]]
-
-        axis = 3
-        if 'data_format' in attr:
-            attr['data_format'] = attr['data_format'].decode("utf-8")
-            if attr['data_format'] == 'NCHW':
-                axis = 1
-
-        return AttrCvt(
-            op_name='batch_norm',
-            transforms={'scale_after_normalization':'scale', 'variance_epsilon':'epsilon'},
-            extras={'axis': axis},
-            ignores=['data_format'],
-            disables=['momentum'])(new_inputs, attr)
-    return _impl
-
-def _relu6():
-    def _impl(inputs, attr, params):
-        return _sym.clip(inputs[0], a_min=0, a_max=6, name=attr['_node_name'])
-    return _impl
-
-def _shape():
-    def _impl(inputs, attr, params):
-        return np.array(attr['_input_shapes'][inputs[0]], dtype='int32')
-    return _impl
-
-def _fill():
-    def _impl(inputs, attr, params):
-        fill_arg = params.pop(inputs.pop(1).list_output_names()[0])
-        new_inputs = []
-        return AttrCvt(
-            op_name='full',
-            extras={'shape':inputs[0],
-                    'fill_value':fill_arg.asnumpy()[0], 'dtype':attr['T'].name},
-            ignores=['index_type', 'T'])(new_inputs, attr)
-    return _impl
-
-def _lrn():
-    def _impl(inputs, attr, params):
-        attr_new = {}
-        depth_radius = attr.get('depth_radius', 5)
-        size = (depth_radius * 2) + 1
-        attr_new['axis'] = 3 # Fix axis, NHWC format
-        attr_new['size'] = size
-        attr_new['bias'] = attr.get('bias', 1)
-        attr_new['alpha'] = attr.get('alpha', 1) * size
-        attr_new['beta'] = attr.get('beta', 0.5)
-        return AttrCvt(op_name='lrn')(inputs, attr_new)
-    return _impl
-
-def _sum():
-    def _impl(inputs, attr, params):
-        axis = params.pop(inputs[1].list_output_names()[0]).asnumpy()
-        # convert to tuple for preventing invalid parameter format error
-        axis = tuple(axis)
-        return AttrCvt(
-            op_name='sum',
-            extras={'axis': axis},
-            transforms={'keep_dims':'keepdims'},
-            ignores=['name', 'Tidx'])(inputs[0], attr)
-    return _impl
-
-def _square():
-    def _impl(inputs, attr, params):
-        return _sym.elemwise_mul(inputs[0], inputs[0])
-    return _impl
-
-def _gather_v2():
-    "Tensorflow now support only gatherv2"
-    def _impl(inputs, attr, params):
-        axis = params[inputs.pop(2).list_output_names()[0]].asnumpy()[0]
-        new_input = []
-        new_input.append(inputs.pop(0))
-        new_input.append(inputs.pop(0))
-        return AttrCvt(
-            op_name="take",
-            extras={'axis':axis},
-            ignores=['Tindices', 'Tparams', 'validate_indices', \
-                     'Taxis', '_class'])(new_input, attr)
-    return _impl
-
-def _infer_out_shapes(inputs, params):
-    """A method to get the output shape of an intermediate node in the NNVM graph."""
-    g = _graph.create(inputs)
-    shape_dict = {k: v.shape for k, v in params.items()}
-    _, out_shapes = graph_util.infer_shape(g, **shape_dict)
-    return out_shapes
-
-def _stridedSlice():
-    def _impl(inputs, attr, params):
-        """Strided Slice.
-        Operator description: https://www.tensorflow.org/api_docs/python/tf/strided_slice
-        Tensorflow mask validation: https://github.com/tensorflow/tensorflow/blob/master/
-        tensorflow/core/util/strided_slice_op.cc#L147-L368
-        """
-        begin = params.pop(inputs[1].list_output_names()[0]).asnumpy().tolist()
-        end = params.pop(inputs[2].list_output_names()[0]).asnumpy().tolist()
-        stride = params.pop(inputs[3].list_output_names()[0]).asnumpy().tolist()
-        begin_mask = int(attr.get('begin_mask', 0))
-        end_mask = int(attr.get('end_mask', 0))
-        ellipsis_mask = int(attr.get('ellipsis_mask', 0))
-        new_axis_mask = int(attr.get('new_axis_mask', 0))
-        shrink_axis_mask = int(attr.get('shrink_axis_mask', 0))
-        data_shape = attr['_input_shapes'][inputs[0]]
-        data_dim = len(data_shape)
-        stride_dim = len(stride)
-
-        def _transform_mask(stride_dim, ellipsis_mask):
-            """Handle mask inputs to create new begin, end, stride and output shape"""
-            m_begin = [0] * data_dim
-            m_end = [0] * data_dim
-            m_stride = [0] * data_dim
-            fshape_indices = []
-            #Count new axis after ellipsis_mask, consider while applying ellipsis_mask.
-            ellipsis_seen = False
-            new_axes_after_ellipsis = 0
-            for i in range(stride_dim):
-                mask = 1 << i
-                if ellipsis_seen and (mask & new_axis_mask) != 0:
-                    new_axes_after_ellipsis += 1
-                if (mask & ellipsis_mask) != 0:
-                    ellipsis_seen = True
-            if not ellipsis_seen:
-                #Used later for extending the stride attributes in the below loop.
-                ellipsis_mask |= (1 << stride_dim)
-                stride_dim += 1
-            final_index = 0
-            for index in range(stride_dim):
-                mask = 1 << index
-                if mask & ellipsis_mask:
-                    #Identify the end index for applying ellipsis_mask
-                    to_index = min(((data_dim - (stride_dim-index)) + 1 \
-                                     + new_axes_after_ellipsis), data_dim)
-                    for i in range(final_index, to_index):
-                        m_begin[final_index] = 0
-                        m_end[final_index] = data_shape[final_index]
-                        m_stride[final_index] = 1
-                        fshape_indices.append(final_index)
-                        final_index += 1
-                elif mask &new_axis_mask:
-                    fshape_indices.append(-1)
-                elif not mask & new_axis_mask:
-                    if final_index == len(m_begin):
-                        break
-                    if mask & begin_mask:
-                        m_begin[final_index] = data_shape[final_index] \
-                                                     if stride[index] < 0 else 0
-                    elif begin[index]:
-                        m_begin[final_index] = begin[index]
-                    if mask & end_mask:
-                        m_end[final_index] = 0 if stride[index] < 0 \
-                                                 else data_shape[final_index]
-                    elif end[index]:
-                        m_end[final_index] = end[index]
-                    m_stride[final_index] = stride[index]
-                    if mask & shrink_axis_mask:
-                        #Tensorflow make axis with shrink_axis_mask as dimension 1
-                        m_begin[final_index] = data_shape[final_index] + begin[index] \
-                                                 if begin[index] < 0 else begin[index]
-                        m_end[final_index] = begin[index] + 1
-                        m_stride[final_index] = 1
-                        fshape_indices.append(-2)
-                    else:
-                        fshape_indices.append(final_index)
-
-                    final_index += 1
-            return m_begin, m_end, m_stride, fshape_indices
-
-        fshape_indices = None
-        if begin_mask or end_mask or ellipsis_mask or new_axis_mask or shrink_axis_mask:
-            begin, end, stride, fshape_indices = _transform_mask(stride_dim, ellipsis_mask)
-        out = _sym.strided_slice(inputs[0], begin=begin, end=end, stride=stride)
-        out_shape = _infer_out_shapes(out, params)[0]
-        if not fshape_indices:
-            fshape_indices = range(len(out_shape))
-
-        #Create final output shape.
-        final_output = []
-        for gather_index in fshape_indices:
-            if gather_index == -1:
-                final_output.append(1)
-            elif gather_index == -2:
-                pass
-            else:
-                final_output.append(out_shape[gather_index])
-        # Prevent 0-dim tensors which are not accepted by nnvm
-        if not final_output:
-            final_output.append(1)
-        return _sym.reshape(out, shape=tuple(final_output))
-    return _impl
-
-def _LSTMBlockCell():
-    def _impl(inputs, in_state_c, in_state_h, attr, params):
-        """LSTM Block cell.
-        Calculations are described in: https://github.com/tensorflow/tensorflow/blob/
-        r1.8/tensorflow/contrib/rnn/python/ops/lstm_ops.py#L41-L114
-
-        Parameters
-        ----------
-        inputs : nnvm.Symbol
-            Input data
-        in_state_c: list of nnvm.Symbol
-            Cell state input values for all the layers
-        in_state_h: list of nnvm.Symbol
-            Hidden state input values for all the layers
-        attrs : dict
-            Dict of operator attributes
-        params : dict
-            List of pretrained weights and bias
-
-        Returns
-        -------
-        sym : nnvm.Symbol
-            Converted nnvm Symbol
-        output: nnvm.Symbol
-            Output state value.
-        """
-        in_data = inputs[0]
-        in_weight = inputs[3]
-        in_bias = inputs[7]
-        forget_bias = attr.pop('forget_bias')
-        input_shape = attr['_input_shapes'][inputs[0]]
-        weight_shape = attr['_input_shapes'][inputs[3]]
-        batch_size, input_size = input_shape[0], input_shape[1]
-        num_hidden_layers = weight_shape[1]
-        num_hidden = num_hidden_layers // 4
-
-        in_data = _sym.reshape(in_data,
-                               shape=(batch_size, input_size))
-        ixh = _sym.concatenate(*[in_data, in_state_h], axis=1)
-        in_weight = _sym.transpose(in_weight)
-        gates = _sym.dense(ixh, in_weight, in_bias, use_bias=True,
-                           units=num_hidden_layers)
-        gate_list = _sym.split(gates, indices_or_sections=4, axis=1)
-        in_gate = _sym.sigmoid(gate_list[0])
-        in_transform = _sym.tanh(gate_list[1])
-        forget_gate = _sym.sigmoid(gate_list[2])
-        forget_gate = forget_gate + forget_bias
-        out_gate = _sym.sigmoid(gate_list[3])
-        next_c = _sym.broadcast_add(_sym.broadcast_mul(forget_gate, in_state_c),
-                                    _sym.broadcast_mul(in_gate, in_transform))
-        next_h = out_gate * _sym.tanh(next_c)
-        out_state = _sym.concatenate(*[next_c, next_h])
-        out_state = _sym.reshape(out_state,
-                                 shape=(2, batch_size, num_hidden))
-        return next_h, out_state
-    return _impl
-
-
-def _pad(name):
-    def _impl(inputs, attr, params):
-        padlist_key = inputs[1].list_output_names()[0]
-        if padlist_key in params:
-            padlist = params.pop(padlist_key).asnumpy()
-        else:
-            raise tvm.error.OpAttributeRequired(
-                'Required attribute "{}" not found in operator Pad.'.format(padlist_key))
-        paddings = tuple([tuple(l) for l in padlist])
-        attr['pad_width'] = paddings
-        attr['pad_value'] = 0
-        new_inputs = [inputs[0]]
-        if name == 'PadV2':
-            constant_values = params.pop(inputs[2].list_output_names()[0]).asnumpy()
-            attr['pad_value'] = constant_values[0]
-        return AttrCvt(
-            op_name='pad',
-            ignores=['Tpaddings'],)(new_inputs, attr)
-    return _impl
-
-
-def _transpose():
-    def _impl(inputs, attr, params):
-        # If perm is not specified, axes is left empty,
-        # otherwise its value is get from params
-        param_name = inputs[1].list_output_names()[0]
-        axes = params.get(param_name, tvm.nd.array([])).asnumpy()
-        return _sym.transpose(inputs[0], axes=tuple(axes))
-    return _impl
-
-def _rank():
-    def _impl(inputs, attr, params):
-        input_shape = attr['_input_shapes'][inputs[0]]
-
-        name = attr["_node_name"]
-        params[name] = tvm.nd.array([len(input_shape)])
-        return _sym.Variable(name=name, shape=params[name].shape)
-    return _impl
-
-def _range():
-    def _impl(inputs, attr, params):
-        start = params.pop(inputs[0].list_output_names()[0]).asnumpy()[0]
-        limit = params.pop(inputs[1].list_output_names()[0]).asnumpy()[0]
-        delta = params.pop(inputs[2].list_output_names()[0]).asnumpy()[0]
-
-        name = attr["_node_name"]
-        params[name] = tvm.nd.array([start, limit, delta])
-        return _sym.Variable(name=name, shape=params[name].shape)
-    return _impl
-
-def _elu():
-    def _impl(inputs, attr, params):
-        alpha = 1.0
-        return -alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0])
-    return _impl
-
-def _selu():
-    def _impl(inputs, attr, params):
-        alpha = 1.6732632423543772848170429916717
-        gamma = 1.0507009873554804934193349852946
-        return gamma * (-alpha * _sym.relu(1 - _sym.exp(inputs[0])) + _sym.relu(inputs[0]))
-    return _impl
-
-def _mean():
-    def _impl(inputs, attr, params):
-        axis = params.pop(inputs[1].list_output_names()[0])
-        return AttrCvt(op_name="mean", ignores=['Tdim', 'Tidx'],
-                       transforms={'keep_dims': 'keepdims'},
-                       extras={'axis': tuple(axis.asnumpy())})(inputs[0], attr)
-    return _impl
-
-def _broadcast(name):
-    def _impl(inputs, attr, params):
-        op_name = _math_name_picker(name)(attr)
-        return AttrCvt(
-            op_name=op_name,
-            ignores=['name', 'Tidx']
-        )(inputs, attr)
-    return _impl
-
-def _split(has_size_vector):
-    # TF documentation https://www.tensorflow.org/api_docs/python/tf/split
-    def _impl(inputs, attr, params):
-        try:
-            # order and number of inputs are different:
-            # if has_size_vector:
-            #     https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/split-v
-            # else:
-            #     https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/split
-
-            # in addition, `axis` and `num_or_size_splits` can be tensors in TensorFlow,
-            # we can only support constants
-            if has_size_vector:
-                input_node_index = 0
-                input_axis_index = 2
-                size_splits_input_name = inputs[1].list_output_names()[0]
-                size_splits = params[size_splits_input_name].asnumpy()
-                section_beginnings = np.cumsum(size_splits)[:-1]
-                indices_or_sections = tuple(section_beginnings)
-            else:
-                input_node_index = 1
-                input_axis_index = 0
-                indices_or_sections = attr['num_split']
-            input_node = inputs[input_node_index]
-            axis_input_name = inputs[input_axis_index].list_output_names()[0]
-            axis_input_value = params[axis_input_name].asnumpy()[0]
-        except (IndexError, KeyError):
-            raise TypeError( \
-                "Unsupported argument for split: `axis` and `num_or_size_splits` " \
-                "should be constants")
-        return _sym.split(input_node,
-                          indices_or_sections=indices_or_sections,
-                          axis=axis_input_value)
-    return _impl
-
-def _unpack():
-    def _impl(inputs, attr, params):
-        input_node = inputs[0]
-        axis = attr['axis']
-        input_shape = attr['_input_shapes'][input_node]
-        axis_length = input_shape[axis]
-        if axis_length < 0:
-            raise TypeError("Unstack with unknown axis length")
-        splitted = _sym.split(input_node,
-                              indices_or_sections=axis_length,
-                              axis=axis,
-                              name=attr.get('_node_name', 'unstack'))
-
-        return _sym.Group([_sym.squeeze(split_item, axis=axis) for split_item in splitted])
-    return _impl
-
-def _expand_dims_0d_aware(data, attr, axis, num_newaxis=1):
-    if data in attr['_input_0d_mismatch']:
-        return data if num_newaxis == 1 else \
-            _sym.expand_dims(data, axis=axis, num_newaxis=num_newaxis-1)
-
-    return _sym.expand_dims(data, axis=axis, num_newaxis=num_newaxis)
-
-def _logical(name):
-    def _impl(inputs, attr, params):
-        return AttrCvt(op_name=name)(inputs, attr)
-    return _impl
-
-# compatible operators that do NOT require any conversion.
-_identity_list = []
-
-# _convert_map defines maps of name to converter functor(callable)
-# for 1 to 1 mapping, use Renamer if nothing but name is different
-# use AttrCvt if attributes need to be converted
-# for 1 to N mapping(composed), use custom callable functions
-# for N to 1 mapping, currently not supported(?)
-_convert_map = {
-    'ArgMax'                            : _argx(_sym.argmax, 'argmax'),
-    'ArgMin'                            : _argx(_sym.argmin, 'argmin'),
-    'AvgPool'                           : _pooling('avg_pool'),
-    'BatchNormWithGlobalNormalization'  : _batch_norm(),
-    'BiasAdd'                           : _bias_add(),
-    'Cast'                              : _cast(),
-    'Ceil'                              : AttrCvt('ceil'),
-    'CheckNumerics'                     : _check_numerics(),
-    'Concat'                            : _concat(),
-    'ConcatV2'                          : _concatV2(),
-    'Conv2D'                            : _conv('conv'),
-    'DecodeJpeg'                        : _decode_image(),
-    'Elu'                               : _elu(),
-    'ExpandDims'                        : _expand_dims(),
-    'Floor'                             : AttrCvt('floor'),
-    'Identity'                          : _identity(),
-    'MatMul'                            : _matmul(),
-    'MaxPool'                           : _pooling('max_pool'),
-    'Add'                               : _elemwise('add'),
-    'Sub'                               : _elemwise('sub'),
-    'Mul'                               : _elemwise('mul'),
-    'RealDiv'                           : _elemwise('div'),
-    'Maximum'                           : _elemwise('max'),
-    'Minimum'                           : _elemwise('min'),
-    'Sum'                               : _sum(),
-    'Square'                            : _square(),
-    'Pack'                              : _pack(),
-    'Slice'                             : _slice(),
-    'LeakyRelu'                         : AttrCvt('leaky_relu'),
-    'Relu'                              : AttrCvt('relu'),
-    'Reshape'                           : _reshape(),
-    'ResizeBilinear'                    : _resize_bilinear(),
-    'Selu'                              : _selu(),
-    'Softmax'                           : AttrCvt('softmax', {'axis': ('axis', 1)}),
-    'Rsqrt'                             : _rsqrt(),
-    'Squeeze'                           : _squeeze(),
-    'FusedBatchNorm'                    : _fused_batch_norm(),
-    'FusedBatchNormV2'                  : _fused_batch_norm(),
-    'Relu6'                             : _relu6(),
-    'DepthwiseConv2dNative'             : _conv('depthwise'),
-    'Shape'                             : _shape(),
-    'Sigmoid'                           : AttrCvt('sigmoid'),
-    'Fill'                              : _fill(),
-    'GatherV2'                          : _gather_v2(),
-    'StridedSlice'                      : _stridedSlice(),
-    'LRN'                               : _lrn(),
-    'Pad'                               : _pad('Pad'),
-    'PadV2'                             : _pad('PadV2'),
-    'Range'                             : _range(),
-    'Rank'                              : _rank(),
-    'Transpose'                         : _transpose(),
-    'Tanh'                              : AttrCvt('tanh'),
-    'Mean'                              : _mean(),
-    'LogicalAnd'                        : _logical('logical_and'),
-    'LogicalOr'                         : _logical('logical_or'),
-    'LogicalNot'                        : _logical('logical_not'),
-    'Less'                              : _broadcast('less'),
-    'Greater'                           : _broadcast('greater'),
-    'LessEqual'                         : _broadcast('less_equal'),
-    'GreaterEqual'                      : _broadcast('greater_equal'),
-    'Equal'                             : _broadcast('equal'),
-    'NotEqual'                          : _broadcast('not_equal'),
-    'Split'                             : _split(False),
-    'SplitV'                            : _split(True),
-    'Unpack'                            : _unpack(),
-}
-
-# _convert_map_rnn defines maps of rnn operator name to
-# converter functor(callable) for 1 to 1 mapping.
-_convert_map_rnn = {
-    'LSTMBlockCell'                     : _LSTMBlockCell(),
-}
-
-class RecurrentNetworks(object):
-    """Recurrent network layer handlers.
-
-    Handle Layer operations.
-    ToDo: Operators like RNN/GRU layer concepts also can be handled here
-
-    Parameters
-    ----------
-    nodes : list
-        list of graph nodes used for tensorflow parsing.
-
-    out_rnn : list
-        List of RecurrentNetwork outputs. This output will be appended to the
-        'head' nodes of the graph.
-
-    graph : tensorflow graph definition object
-        The loaded tensorflow GraphDef
-
-    convert_map : dict
-        Dict of name : callable, where name is the op's name that
-        require conversion to nnvm, callable are functions which
-        take attrs and return (new_op_name, new_attrs)
-    """
-    def __init__(self, nodes, out_rnn, graph, convert_map):
-        self._graph = graph
-        self._convert_map = convert_map
-        self._nodes = nodes
-        self._out_rnn = out_rnn
-        self._cur_lstm_layer = 0
-        self._layer_name_list = []
-        self._recurrent_ops_layer_map = {
-            'LSTMBlockCell'               : self._LSTMBlockCellLayer(),
-        }
-
-    def _LSTMBlockCellLayer(self):
-        """LSTMBlockCell layer handler.
-
-        Parameters
-        ----------
-        op_name : str
-            Operator name, eg:LSTMBlockCell
-
-        layer_name : str list
-            Layer name is used for creating the state input placeholder.
-
-        inputs : nnvm.Symbol
-            Input data
-
-        attrs : dict
-            Dict of operator attributes
-
-        params : dict
-            List of pretrained weights and bias
-
-        num_layers : int
-            Total number of LSTM layer presented in the graph
-
-        Returns
-        -------
-        sym : nnvm.sym.Symbol
-            The returned nnvm symbol
-        """
-        def _impl(op_name, layer_name, inputs, attrs, params, num_layers):
-            in_state_c_name = layer_name+'_c'
-            in_state_h_name = layer_name+'_h'
-
-            def _init_state(num_layers, batch_size, num_hidden):
-                """Create the initial states for the first layer in the graph."""
-                in_state_c = _sym.Variable(in_state_c_name,
-                                           shape=(num_layers, batch_size, num_hidden))
-                in_state_h = _sym.Variable(in_state_h_name,
-                                           shape=(num_layers, batch_size, num_hidden))
-                return in_state_c, in_state_h
-
-            def _get_cur_input_state(in_state_c, in_state_h, num_layers,
-                                     layer, batch_size, num_hidden):
-                """Select the appropriate states for the current layer"""
-                in_state_c_tup = _sym.split(in_state_c,
-                                            indices_or_sections=num_layers, axis=0)
-                in_state_h_tup = _sym.split(in_state_h,
-                                            indices_or_sections=num_layers, axis=0)
-                cur_in_state_c = _sym.reshape(in_state_c_tup[layer],
-                                              shape=(batch_size, num_hidden))
-                cur_in_state_h = _sym.reshape(in_state_h_tup[layer],
-                                              shape=(batch_size, num_hidden))
-                return cur_in_state_c, cur_in_state_h
-
-            def _LSTMBlockCellWrapper(inputs, attr, params,
-                                      num_layers, layer):
-                """LSTM cell warapper to prepare the inputs"""
-                input_shape = attr['_input_shapes'][inputs[0]]
-                weight_shape = attr['_input_shapes'][inputs[3]]
-                batch_size = input_shape[0]
-                num_hidden = weight_shape[1] // 4
-
-                if layer == 0:
-                    #Create initial states placeholder in case of first layer
-                    in_state_c, in_state_h = _init_state(num_layers,
-                                                         batch_size, num_hidden)
-                else:
-                    in_state_c = self._nodes[in_state_c_name]
-                    in_state_h = self._nodes[in_state_h_name]
-
-                cur_in_state_c, cur_in_state_h = _get_cur_input_state( \
-                                                    in_state_c, in_state_h,
-                                                    num_layers, layer,
-                                                    batch_size, num_hidden)
-                output, out_state = self._convert_map[op_name](inputs, cur_in_state_c,
-                                                               cur_in_state_h,
-                                                               attr, params)
-                return output, out_state, in_state_c, in_state_h
-
-            sym, cur_out_state, in_state_c, in_state_h = \
-                    _LSTMBlockCellWrapper(inputs, attrs, params,
-                                          num_layers, self._cur_lstm_layer)
-            self._nodes[in_state_c_name] = in_state_c
-            self._nodes[in_state_h_name] = in_state_h
-            cur_out_state = _sym.expand_dims(cur_out_state, axis=0, num_newaxis=1)
-            self._out_rnn.append(cur_out_state)
-            self._cur_lstm_layer += 1
-            return sym
-        return _impl
-
-    def process_op(self, op_name, inputs, attrs, params):
-        """Process recurrent layer operators.
-
-        List '_recurrent_ops_layer_map' map each Layer based operators with its
-        layer handlers. Total number of layers are calculated to form the input
-        data shapes.
-
-        Parameters
-        ----------
-        op_name : str
-            Operator name, such as LSTMBlockCell
-
-        inputs : nnvm.Symbol
-            Input data
-
-        attrs : dict
-            Dict of operator attributes
-
-        params : dict
-            List of pretrained weights and bias
-
-        Returns
-        -------
-        sym : nnvm.sym.Symbol
-            The returned nnvm symbol
-        """
-        def _get_abs_layer_name(node):
-            """Identify the layer name is already handled. Return the absolute name
-            """
-            if not self._layer_name_list:
-                self._layer_name_list.append(node.name)
-                return node.name
-
-            for _name in self._layer_name_list:
-                if _name in node.name:
-                    abs_name = _name
-                else:
-                    self._layer_name_list.append(node.name)
-                    abs_name = node.name
-            return abs_name
-
-        #Find number of layers of this same operator node in the graph
-        #and also read the inputs name for the current op.
-        num_layers = 0
-        for _, node in enumerate(self._graph.node):
-            if node.op == op_name:
-                layer_name = _get_abs_layer_name(node)
-                num_layers += 1
-
-        sym = self._recurrent_ops_layer_map[op_name](op_name, layer_name, inputs, attrs,
-                                                     params, num_layers)
-        return sym
-
-class GraphProto(object):
-    """ A helper class for handling nnvm graph copying from Tensorflow GraphDef.
-    Definition:
-        https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto
-    """
-    def __init__(self):
-        self._nodes = {}
-        self._params = {}
-        self._output_shapes = {}
-        self._num_param = 0
-        self._num_rnn_layer = False
-        self._outputs_are_0d = {}
-        self._input_shapes = {}
-
-    def from_tensorflow(self, graph, layout="NHWC", shape=None, outputs=None):
-        """Construct nnvm nodes from tensorflow graph definition - GraphDef.
-
-        Follow the tensorflow graph definition to parse and convert it to NNVM.
-        Some of the assumptions listed below.
-
-            -> All Placeholders are considered as graph input.
-            -> All Const nodes are params.
-            -> Last node is assumed as graph output.
-            -> _output_shapes : Graph should be frozen with add_shapes=True.
-                                Or user can pass input shape dictionary optionally.
-            -> DecodeJpeg, ResizeBilinear: These are dummy operators.
-                                           Hence user should handle preprocessing outside.
-            -> CheckNumerics: No implementation as of now for this.
-                              Just copies input to output.
-
-        Parameters
-        ----------
-        graph : tensorflow graph definition object
-            The loaded tensorflow GraphDef
-
-        layout : target layout to be used (Optional)
-            NCHW only supported now to enable NHWC models on GPU.
-
-        shape : Dictionary of input dimensions (Optional)
-            Graph level input shape dictionary.
-
-        outputs : List of output tensor names (Optional)
-            if not specified then the last node is assumed as graph output.
-
-        Returns
-        -------
-        sym : nnvm.sym.Symbol
-            The returned nnvm symbol
-        params : dict
-            A dict of name: tvm.nd.array pairs, used as pretrained weights
-        """
-
-        try:
-            from tensorflow.python.framework import tensor_util
-        except ImportError as e:
-            raise ImportError(
-                "Unable to import tensorflow which is required {}".format(e))
-
-        missing_operators = self._parse_import_prerequisites(graph)
-
-        if missing_operators:
-            msg = 'The following operators are not supported in frontend TensorFlow: {}'
-            ops = str(list(missing_operators)).strip('[,]')
-            raise tvm.error.OpNotImplemented(msg.format(ops))
-
-        for node in graph.node:
-            if node.op == 'Placeholder':
-                # Give priority to user argument.
-                if shape and node.name in shape:
-                    self._input_shapes[node.name] = list(shape[node.name])
-                else:
-                    self._input_shapes[node.name] = \
-                        tensor_util.TensorShapeProtoToList(node.attr['shape'].shape)
-                    for idx, dim in enumerate(self._input_shapes[node.name]):
-                        if dim < 0:
-                            self._input_shapes[node.name][idx] = 1
-                            warnings.warn("Use 1 instead of -1 in shape of operator %s."
-                                          % node.name)
-
-                self._nodes[node.name] = _sym.Variable(name=node.name,
-                                                       shape=self._input_shapes[node.name])
-                self._output_shapes[node.name] = [self._input_shapes[node.name]]
-                self._outputs_are_0d[node.name] = [ \
-                    not tshape if isinstance(tshape, list) else False \
-                    for tshape in self._output_shapes[node.name]]
-
-            # Ignore user's input shape for Non placeholder
-            elif node.op == 'Const':
-                tensor_value = node.attr['value'].tensor
-                self._input_shapes[node.name] = \
-                    tensor_util.TensorShapeProtoToList(tensor_value.tensor_shape)
-                if shape and node.name in shape:
-                    warnings.warn("Ignore the passed shape. "
-                                  "Shape in graphdef will be used for operator %s." % node.name)
-
-        final_op = None
-        # Parse the nodes to re-create TF graph using Symbol API of NNVM
-        for node in graph.node:
-            # Tensorflow doesn't have separate list for params extraction.
-            # Operator name 'Const' is treated as a parameter to build NNVM params dict.
-
-            input_shapes = {}
-            input_0d_mismatch = set()
-            attr = self._parse_attr(node.attr)
-
-            #  Variable converted to Const will not have only value attr
-            if 'value' in attr and node.op == 'Const':
-                self._output_shapes[node.name] = [self._input_shapes[node.name]]
-            elif '_output_shapes' in attr:
-                self._output_shapes[node.name] = \
-                    [tensor_util.TensorShapeProtoToList(tshape) \
-                    for tshape in attr['_output_shapes']]
-            else:
-                # Keep the list indexable to avoid key error.
-                # Actual value will be filled after node creation.
-                # Will infer shapes if the graph is not frozen with add_shapes=True
-                self._output_shapes[node.name] = [None]
-
-            self._outputs_are_0d[node.name] = [ \
-                not tshape if isinstance(tshape, list) else False \
-                for tshape in self._output_shapes[node.name]]
-
-            if node.op == "Const":
-                # All Const nodes are Param nodes, lets parse
-                self._num_param += 1
-                for key, value in node.attr.items():
-                    self._parse_param(key, value, node.name)
-                if node.name not in self._nodes:
-                    raise NotImplementedError( \
-                        "Const {} couldn't be converted to Param.".format(node.name))
-
-                attr = self._parse_attr(node.attr)
-
-            elif node.op != "Placeholder":
-                # Pass the parsed shapes instead
-                attr["_output_shapes"] = output_shapes = self._output_shapes[node.name]
-
-                # Pass the node name too in attr
-                attr["_node_name"] = node.name
-
-                # Pass the target layout
-                attr["_target_layout"] = layout
-
-                # Fill shapes for all inputs in a list
-                inputs = []
-                for i in node.input:
-                    # Some TensorFlow operators internally maintain execution layers
-                    # and their output name includes the layer number along with
-                    # graph node name. E.g. the node name is 'Model/RNN/cell_0/RnnCell', but the
-                    # output tensor name is 'Model/RNN/cell_0/RnnCell:0'. In this case,
-                    # the number has to be ignored for single-output nodes.
-                    # On the other hand, for multi-output nodes the number is the output index,
-                    # and the lack of the number implies 0.
-                    tensor_name = i.split(':')
-                    node_name = tensor_name[0]
-                    if node_name in self._nodes:
-                        in_sym = self._nodes[node_name]
-                        if len(in_sym.list_output_names()) > 1:
-                            tensor_slot = int(tensor_name[1]) if len(tensor_name) > 1 else 0
-                            in_sym = in_sym[tensor_slot]
-                            input_shape = self._output_shapes[node_name][tensor_slot]
-                        else:
-                            tensor_slot = 0
-                            input_shape = self._output_shapes[node_name][0]
-                        inputs.append(in_sym)
-                        input_shapes[in_sym] = input_shape
-                        # This means the node is 1d in NNVM and 0d in TF.
-                        # See `_expand_dims_0d_aware`.
-                        if self._outputs_are_0d[node_name][tensor_slot] and input_shape:
-                            input_0d_mismatch.add(in_sym)
-                attr['_input_shapes'] = input_shapes
-                attr['_input_0d_mismatch'] = input_0d_mismatch
-
-                inputs = self._fix_extranodes(node.op, attr, inputs)
-                op = self._convert_operator(node.op, inputs, attr, graph)
-
-                # Check if op is converted to param
-                if isinstance(op, np.ndarray):
-                    self._params[node.name] = tvm.nd.array(op)
-                    op = _sym.Variable(name=node.name,
-                                       shape=self._params[node.name].shape)
-
-                # Assuming only one output.
-                self._nodes[node.name] = op
-                final_op = op
-
-                # Infer shapes even without specifying "add_shapes=True"
-                if output_shapes == [None]:
-                    g = _graph.create(final_op)
-                    self._output_shapes[node.name] = \
-                        list(graph_util.infer_shape(g, **self._input_shapes))[-1]
-
-                if self._output_shapes[node.name] and shape and node.name in shape:
-                    assert self._output_shapes[node.name] == list(shape[node.name])
-
-            # Infer shapes if passed explicitely
-            node_output = self._nodes[node.name]
-            if shape and (not self._output_shapes[node.name][0]
-                          or -1 in self._output_shapes[node.name][0]):
-                g = _graph.create(node_output)
-                shape_dict = {k: v.shape for k, v in self._params.items()}
-                shape_dict.update(shape)
-                _, out_shapes = graph_util.infer_shape(g, **shape_dict)
-                self._output_shapes[node.name] = out_shapes
-
-        out = []
-        if outputs is None:
-            out.append(final_op)
-        else:
-            for out_name in outputs:
-                if ":" in out_name:
-                    out_name, out_num = out_name.split(":")
-                    out_num = int(out_num)
-                    out.append(self._nodes[out_name][out_num])
-                else:
-                    out.append(self._nodes[out_name])
-
-        #Add the RNN outputs also with 'head' nodes of the nnvm graph
-        if self._num_rnn_layer:
-            out_rnn = _sym.concatenate(*self._out_rnn, axis=0)
-            out.append(out_rnn)
-
-        if isinstance(out, list):
-            out = _sym.Group(out) if len(out) > 1 else out[0]
-
-        return out, self._params
-
-    def _parse_import_prerequisites(self, graph):
-        """ Calculate the named preconditions from TensorFlow `graph`.
-            Return prerequisites for parsing:
-            a. Set of operator names which don't have their mapping in TVM, i.e.
-                which are not supported
-        """
-        missing_operators = set()
-        for node in graph.node:
-            if node.op == "Placeholder":
-                pass
-            elif node.op == "Const":
-                pass
-            else:
-                if any([node.op in t for t in [_identity_list, _convert_map, _convert_map_rnn]]):
-                    pass
-                else:
-                    missing_operators.add(node.op)
-
-        return missing_operators
-
-    def _parse_param(self, key, value, name):
-        try:
-            from tensorflow.python.framework import tensor_util
-        except ImportError as e:
-            raise ImportError(
-                "Unable to import tensorflow which is required {}".format(e))
-
-        if key == 'value':
-            np_array = tensor_util.MakeNdarray(value.tensor)
-
-            if np_array.dtype == np.dtype(object):
-                # Object types are generally tensorflow DT_STRING (DecodeJpeg op).
-                # Just leave it as placeholder.
-                self._nodes[name] = _sym.Variable(name=name)
-                return
-
-            array_ndim = len(np_array.shape)
-            if array_ndim == 0:
-                new_array = np.empty([1], dtype=np_array.dtype)
-                new_array[0] = np_array
-                self._params[name] = tvm.nd.array(new_array)
-            else:
-                self._params[name] = tvm.nd.array(np_array)
-            self._nodes[name] = _sym.Variable(name=name,
-                                              shape=self._params[name].shape)
-        else:
-            if key not in ('dtype', '_output_shapes', '_class'):
-                raise NotImplementedError \
-                    ("Other attributes for a Const(param) Node {} ? .".format(key))
-
-    def _get_attr(self, buf):
-        """Returns the value of the attr of this buf with the given `name`.
-
-        Args:
-          buf: attrvalue protobuf.
-
-        Returns:
-          The value of the attr, as a Python object.
-
-        Raises:
-          ValueError: If this op does not have an attr with the given `name`.
-        """
-        fields = ["s", "i", "f", "b", "type", "shape", "tensor", "func"]
-
-        x = buf
-
-        ret = []
-
-        try:
-            from tensorflow.python.framework import dtypes
-        except ImportError as e:
-            raise ImportError(
-                "Unable to import tensorflow which is required {}".format(e))
-
-        # Treat an empty oneof value as an empty list.
-        if not x.WhichOneof("value"):
-            return ret
-        if x.HasField("list"):
-            for f in fields:
-                if getattr(x.list, f):
-                    if f == "type":
-                        ret += [dtypes.as_dtype(x) for x in list(getattr(x.list, f))]
-                    else:
-                        ret += list(getattr(x.list, f))
-        else:
-            for f in fields:
-                if x.HasField(f):
-                    if f == "type":
-                        ret = dtypes.as_dtype(getattr(x, f))
-                    else:
-                        ret = getattr(x, f)
-        return ret
-
-    def _parse_attr(self, attr_proto):
-        """Convert a list of AttributeProto to a dict, with names as keys."""
-        attrs = {}
-        for key, value in attr_proto.items():
-            attrs[key] = self._get_attr(value)
-
-        return attrs
-
-    def _convert_rnn_operator(self, op_name, inputs,
-                              attrs, params, graph, convert_map):
-        """Convert RNN and its variant operators to NNVM operators.
-        This converter read the input states of each layers and
-        also maintain the output states of each layer in a list.
-
-        Parameters
-        ----------
-        op_name : str
-            Operator name, such as LSTMBlockCell
-        inputs : list of nnvm.Symbol
-            List of input symbols.
-        attrs : dict
-            Dict of operator attributes
-        params : dict
-            List of pretrained weights and bias
-        graph : Tensorflow graph object
-            Graph is to find the number of upcoming same operator to
-            calculate the number of layers.
-        convert_map : dict
-            Dict of name : callable, where name is the op's name that
-            require conversion to nnvm, callable are functions which
-            take attrs and return (new_op_name, new_attrs)
-
-        Returns
-        -------
-        sym : nnvm.Symbol
-            Converted nnvm Symbol
-        """
-        if not self._num_rnn_layer:
-            self._out_rnn = []
-            self.rnn = RecurrentNetworks(self._nodes, self._out_rnn, graph, convert_map)
-            self._num_rnn_layer = True
-        sym = self.rnn.process_op(op_name, inputs, attrs, params)
-        return sym
-
-    def _convert_operator(self, op_name, inputs, attrs,
-                          graph, identity_list=None, convert_map=None):
-        """Convert from Tensorflow operator to nnvm operator.
-        The converter must specify conversions explicitly for incompatible name, and
-        apply handlers to operator attributes.
-
-        Parameters
-        ----------
-        op_name : str
-            Operator name, such as Conv2D, AvgPool
-        inputs : list of nnvm.Symbol
-            List of input symbols.
-        attrs : dict
-            Dict of operator attributes
-        identity_list : list
-            List of operators that don't require conversion
-        convert_map : dict
-            Dict of name : callable, where name is the op's name that
-            require conversion to nnvm, callable are functions which
-            take attrs and return (new_op_name, new_attrs)
-
-        Returns
-        -------
-        sym : nnvm.Symbol
-            Converted nnvm Symbol
-        """
-        identity_list = identity_list if identity_list else _identity_list
-        convert_map = convert_map if convert_map else _convert_map
-        convert_map_rnn = _convert_map_rnn
-        if op_name in identity_list:
-            sym = get_nnvm_op(op_name)(*inputs, **attrs)
-        elif op_name in convert_map:
-            sym = convert_map[op_name](inputs, attrs, self._params)
-        elif op_name in convert_map_rnn:
-            sym = self._convert_rnn_operator(op_name, inputs, attrs,
-                                             self._params, graph,
-                                             convert_map_rnn)
-        else:
-            raise tvm.error.OpNotImplemented(
-                'Operator {} is not supported in frontend TensorFlow.'.format(op_name))
-        return sym
-
-    def _fix_extranodes(self, op_name, attr, inputs):
-        if op_name == "Softmax":
-            # Require some times flatten of data before it goes to softmax
-            # Need to relook into this with latest softmax axis support.
-            op = AttrCvt(op_name='flatten')(inputs, {})
-            node_output = op.list_output_names()
-            for k, i in zip(list(node_output), range(len(node_output))):
-                self._nodes[k] = op[i]
-            inputs = [op]
-
-        return inputs
-
-def from_tensorflow(graph, layout="NHWC", shape=None, outputs=None):
-    """Load tensorflow graph which is a python tensorflow graph object into nnvm graph.
-    The companion parameters will be handled automatically.
-
-    Parameters
-    ----------
-    graph : GraphDef object
-        Tensorflow GraphDef
-
-    layout : target layout to be used (Optional)
-        NCHW only supported now to enable NHWC models on GPU.
-
-    shape : Dictionary of input dimensions (Optional)
-        Graph level input shape dictionary.
-
-    outputs : List of output tensor names (Optional)
-        if not specified then the last node is assumed as graph output.
-
-    Returns
-    -------
-    sym : nnvm.Symbol
-        Compatible nnvm symbol
-
-    params : dict of str to tvm.ndarray
-        Dict of converted parameters stored in tvm.ndarray format
-    """
-    g = GraphProto()
-    sym, params = g.from_tensorflow(graph, layout, shape, outputs)
-    return sym, params
diff --git a/nnvm/python/nnvm/graph.py b/nnvm/python/nnvm/graph.py
deleted file mode 100644
index 0d1e70f4e0f6..000000000000
--- a/nnvm/python/nnvm/graph.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-# pylint: disable=invalid-name, protected-access, too-many-arguments, too-many-lines
-"""NNVM Graph IR API.
-
-This is a developer API that is used to manipulate and transform graphs.
-"""
-from __future__ import absolute_import as _abs
-
-import ctypes
-import json
-from ._base import _LIB
-from ._base import c_array, c_str, nn_uint, py_str, string_types
-from ._base import GraphHandle, SymbolHandle
-from ._base import check_call
-from .symbol import Variable, Symbol, Group as _Group
-
-class GraphIndex(object):
-    """Index for quickly accessing graph attributes.
-
-    Parameters
-    ----------
-    graph : Graph
-        The graph to create index.
-    """
-    def __init__(self, graph):
-        jgraph = json.loads(create(graph).apply("SaveJSON").json_attr("json"))
-        self.nodes = jgraph["nodes"]
-        self.entry_ptr = jgraph["node_row_ptr"]
-        self._name2nodeid = {n["name"]: i for i, n in enumerate(self.nodes)}
-        self.input_names = graph.symbol.list_input_names()
-        self.output_entries = jgraph["heads"]
-
-    @property
-    def num_nodes(self):
-        """Number of nodes in graph."""
-        return len(self.entry_ptr) - 1
-
-    @property
-    def num_node_entries(self):
-        """Number of nodes in graph."""
-        return self.entry_ptr[-1]
-
-    def node_id(self, key):
-        """Get the node index for a given key.
-
-        Parameters
-        ----------
-        key : str or int
-            The node key or index
-
-        Returns
-        -------
-        index : int
-            The entry index
-        """
-        return self._name2nodeid[key]
-
-    def entry_id(self, key, value_index=0):
-        """Get the entry id of a node entry.
-
-        Parameters
-        ----------
-        key : str or int
-            The node key or index
-
-        value_index : int
-            The value index of output
-
-        Returns
-        -------
-        index : int
-            The entry index
-        """
-        if isinstance(key, (list, tuple)):
-            if len(key) != 3:
-                raise ValueError("Expect entry index to be tuple of 3 elems")
-            key, value_index, _ = key
-        idx = self.node_id(key) if isinstance(key, str) else key
-        assert value_index < self.entry_ptr[idx + 1]
-        return self.entry_ptr[idx] + value_index
-
-
-
-class Graph(object):
-    """Graph is the graph object that can be used to apply optimization pass.
-
-    It contains additional graphwise attribute besides the internal symbol.
-    """
-    _tvm_tcode = 17
-
-    # pylint: disable=no-member
-    def __init__(self, handle):
-        """Initialize the function with handle
-
-        Parameters
-        ----------
-        handle : GraphHandle
-            the handle to the underlying C++ Graph
-        """
-        self.handle = handle
-        self._index = None
-
-    def __del__(self):
-        check_call(_LIB.NNGraphFree(self.handle))
-
-    def json_attr(self, key):
-        """Get attribute string from the graph.
-
-        Parameters
-        ----------
-        key : str
-            The key to get attribute from.
-
-        Returns
-        -------
-        value : str
-            The attribute value of the key, returns None if attribute do not exist.
-        """
-        ret = ctypes.c_char_p()
-        success = ctypes.c_int()
-        check_call(_LIB.NNGraphGetJSONAttr(
-            self.handle, c_str(key), ctypes.byref(ret), ctypes.byref(success)))
-        if success.value != 0:
-            json_str = py_str(ret.value)
-            return json.loads(json_str)[1]
-        return None
-
-    def _set_symbol_list_attr(self, key, value):
-        """Set the attribute of the graph.
-
-        Parameters
-        ----------
-        key : string
-            The key of the attribute
-        value : value
-            The any type that can be dumped to json
-        type_name : string
-            The typename registered on c++ side.
-        """
-        if isinstance(value, list):
-            value = _Group(value)
-        if not isinstance(value, Symbol):
-            raise ValueError("value need to be grouped symbol")
-        check_call(_LIB.NNGraphSetNodeEntryListAttr_(
-            self.handle, c_str(key), value.handle))
-
-    def _set_json_attr(self, key, value, type_name=None):
-        """Set the attribute of the graph.
-
-        Parameters
-        ----------
-        key : string
-            The key of the attribute
-        value : value
-            The any type that can be dumped to json
-        type_name : string
-            The typename registered on c++ side.
-        """
-        if isinstance(value, string_types):
-            type_name = 'str'
-        elif type_name is None:
-            raise ValueError("Need to specify type_name")
-        json_value = json.dumps([type_name, value])
-        check_call(_LIB.NNGraphSetJSONAttr(
-            self.handle, c_str(key), c_str(json_value)))
-
-    @property
-    def _tvm_handle(self):
-        return self.handle.value
-
-    @property
-    def symbol(self):
-        shandle = SymbolHandle()
-        check_call(_LIB.NNGraphGetSymbol(self.handle, ctypes.byref(shandle)))
-        return Symbol(shandle)
-
-    def json(self):
-        """Get JSON representation of the graph
-
-        Returns
-        -------
-        json : str
-            JSON representation of the graph
-        """
-        return self.apply("SaveJSON").json_attr("json")
-
-    def _tvm_graph_json(self):
-        """Get TVM graph json"""
-        return self.json()
-
-    @property
-    def index(self):
-        if not self._index:
-            self._index = GraphIndex(self)
-        return self._index
-
-    def ir(self, join_entry_attrs=None, join_node_attrs=None):
-        """Get text form of graph ir.
-
-        Parameters
-        ----------
-        join_entry_attrs : list of str
-            List of graph NodeEntry attribute to be
-            printed along each operator.
-
-        join_node_attrs : list of str
-            List of graph node attribute to be
-            printed along each operator.
-        """
-        if join_entry_attrs:
-            self._set_json_attr("join_entry_attrs", join_entry_attrs, "list_str")
-        if join_node_attrs:
-            self._set_json_attr("join_node_attrs", join_node_attrs, "list_str")
-        return self.apply("PrintGraphIR").json_attr("graphir")
-
-    def apply(self, passes):
-        """Apply passes to the graph
-
-        Parameters
-        ----------
-        passes : str or list of str
-            The passes to be applied
-
-        Returns
-        -------
-        g : Graph
-            The transformed graph.
-        """
-        if isinstance(passes, string_types):
-            passes = [passes]
-        cpass = c_array(ctypes.c_char_p, [c_str(key) for key in passes])
-        ghandle = GraphHandle()
-        npass = nn_uint(len(passes))
-        check_call(_LIB.NNGraphApplyPasses(self.handle, npass, cpass, ctypes.byref(ghandle)))
-        return Graph(ghandle)
-
-
-def load_json(json_str):
-    """Create a new graph by loading from json
-
-    Parameters
-    ----------
-    json_str : str
-        The json string
-
-    Returns
-    -------
-    graph : Graph
-        The loaded graph
-    """
-    ret = create(Variable("x"))
-    ret._set_json_attr("json", json_str)
-    return ret.apply("LoadJSON")
-
-
-def create(symbol):
-    """Create a new graph from symbol.
-
-    Parameters
-    ----------
-    symbol : Symbol
-        The symbolic graph used to create Graph object.
-
-    Returns
-    -------
-    graph : Graph
-        A generated new graph object.
-    """
-    ghandle = GraphHandle()
-    check_call(_LIB.NNGraphCreate(
-        symbol.handle, ctypes.byref(ghandle)))
-    return Graph(ghandle)
diff --git a/nnvm/python/nnvm/libinfo.py b/nnvm/python/nnvm/libinfo.py
deleted file mode 100644
index b3bfc753b9c2..000000000000
--- a/nnvm/python/nnvm/libinfo.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-"""Information about nnvm."""
-from __future__ import absolute_import
-import sys
-import os
-import platform
-
-if sys.version_info[0] == 3:
-    import builtins as __builtin__
-else:
-    import __builtin__
-
-def find_lib_path():
-    """Find NNNet dynamic library files.
-
-    Returns
-    -------
-    lib_path : list(string)
-        List of all found path to the libraries
-    """
-    if hasattr(__builtin__, "NNVM_BASE_PATH"):
-        base_path = __builtin__.NNVM_BASE_PATH
-    else:
-        base_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-
-    if hasattr(__builtin__, "NNVM_LIBRARY_NAME"):
-        lib_name = __builtin__.NNVM_LIBRARY_NAME
-    else:
-        lib_name = "nnvm_compiler" if sys.platform.startswith('win32') else "libnnvm_compiler"
-
-    api_path = os.path.join(base_path, '..', '..', 'lib')
-    cmake_build_path_win = os.path.join(base_path, '..', '..', '..', 'build', 'Release')
-    cmake_build_path = os.path.join(base_path, '..', '..', '..', 'build')
-    install_path = os.path.join(base_path, '..', '..', '..')
-    dll_path = [base_path, api_path, cmake_build_path_win, cmake_build_path,
-                install_path]
-
-    if sys.platform.startswith('linux') and os.environ.get('LD_LIBRARY_PATH', None):
-        dll_path.extend([p.strip() for p in os.environ['LD_LIBRARY_PATH'].split(":")])
-    elif sys.platform.startswith('darwin') and os.environ.get('DYLD_LIBRARY_PATH', None):
-        dll_path.extend([p.strip() for p in os.environ['DYLD_LIBRARY_PATH'].split(":")])
-    elif sys.platform.startswith('win32') and os.environ.get('PATH', None):
-        dll_path.extend([p.strip() for p in os.environ['PATH'].split(";")])
-
-    if sys.platform.startswith('win32'):
-        vs_configuration = 'Release'
-        if platform.architecture()[0] == '64bit':
-            dll_path.append(os.path.join(base_path, '..', '..', '..', 'build', vs_configuration))
-            dll_path.append(os.path.join(base_path, '..', '..', '..', 'windows', 'x64',
-                                         vs_configuration))
-        else:
-            dll_path.append(os.path.join(base_path, '..', '..', '..', 'build', vs_configuration))
-            dll_path.append(os.path.join(base_path, '..', '..', '..', 'windows', vs_configuration))
-        dll_path = [os.path.join(p, '%s.dll' % lib_name) for p in dll_path]
-    elif sys.platform.startswith('darwin'):
-        dll_path = [os.path.join(p, '%s.dylib' % lib_name) for p in dll_path]
-    else:
-        dll_path = [os.path.join(p, '%s.so' % lib_name) for p in dll_path]
-
-    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
-    if not lib_path:
-        raise RuntimeError('Cannot find the files.\n' +
-                           'List of candidates:\n' + str('\n'.join(dll_path)))
-    return lib_path
-
-
-# current version
-__version__ = "0.8.0"
diff --git a/nnvm/python/nnvm/name.py b/nnvm/python/nnvm/name.py
deleted file mode 100644
index fe3d8311f1a6..000000000000
--- a/nnvm/python/nnvm/name.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# coding: utf-8
-"""Automatic naming support for symbolic API."""
-from __future__ import absolute_import as _abs
-
-class NameManager(object):
-    """NameManager to do automatic naming.
-
-    User can also inherit this object to change naming behavior.
-    """
-    current = None
-
-    def __init__(self):
-        self._counter = {}
-        self._old_manager = None
-
-    def get(self, name, hint):
-        """Get the canonical name for a symbol.
-
-        This is default implementation.
-        When user specified a name,
-        the user specified name will be used.
-
-        When user did not, we will automatically generate a
-        name based on hint string.
-
-        Parameters
-        ----------
-        name : str or None
-            The name user specified.
-
-        hint : str
-            A hint string, which can be used to generate name.
-
-        Returns
-        -------
-        full_name : str
-            A canonical name for the user.
-        """
-        if name:
-            return name
-        if hint not in self._counter:
-            self._counter[hint] = 0
-        name = '%s%d' % (hint, self._counter[hint])
-        self._counter[hint] += 1
-        return name
-
-    def __enter__(self):
-        self._old_manager = NameManager.current
-        NameManager.current = self
-        return self
-
-    def __exit__(self, ptype, value, trace):
-        assert self._old_manager
-        NameManager.current = self._old_manager
-
-
-class Prefix(NameManager):
-    """A name manager that always attach a prefix to all names.
-
-    Examples
-    --------
-    >>> import nnvm as nn
-    >>> data = nn.symbol.Variable('data')
-    >>> with nn.name.Prefix('mynet_'):
-            net = nn.symbol.FullyConnected(data, num_hidden=10, name='fc1')
-    >>> net.list_arguments()
-    ['data', 'mynet_fc1_weight', 'mynet_fc1_bias']
-    """
-    def __init__(self, prefix):
-        super(Prefix, self).__init__()
-        self._prefix = prefix
-
-    def get(self, name, hint):
-        name = super(Prefix, self).get(name, hint)
-        return self._prefix + name
-
-# initialize the default name manager
-NameManager.current = NameManager()
diff --git a/nnvm/python/nnvm/symbol.py b/nnvm/python/nnvm/symbol.py
deleted file mode 100644
index 297d2ba7405a..000000000000
--- a/nnvm/python/nnvm/symbol.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-import, protected-access
-"""Symbolic graph construction API.
-
-This namespace contains most of the registered operators.
-For detailed list of operators, checkout ``Core Tensor Operators``
-"""
-from __future__ import absolute_import as _abs
-import sys as _sys
-import os as _os
-import ctypes as _ctypes
-from numbers import Number as _Number
-
-import numpy as np
-
-from . import _base
-from ._base import _LIB, check_call as _check_call, _FFI_MODE, _all_var_init
-from .attribute import AttrScope
-from . import _symbol_internal as _internal
-from . import contrib
-
-# Use different verison of SymbolBase
-# When possible, use cython to speedup part of computation.
-
-IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError
-
-try:
-    if _FFI_MODE == "ctypes":
-        raise ImportError()
-    if _sys.version_info >= (3, 0):
-        from ._cy3.symbol import SymbolBase, _init_symbol_module
-    else:
-        from ._cy2.symbol import SymbolBase, _init_symbol_module
-except IMPORT_EXCEPT:
-    # pylint: disable=wrong-import-position
-    from ._ctypes.symbol import SymbolBase, _init_symbol_module
-
-
-class Symbol(SymbolBase):
-    """Symbol is basic operation unit for symbolic graph composition."""
-    # disable dictionary storage, also do not have parent type.
-    __slots__ = []
-
-    _tvm_tcode = 16
-
-    @property
-    def _tvm_handle(self):
-        return self.handle.value
-
-    def __add__(self, other):
-        """x.__add__(y) <=> x+y"""
-        if isinstance(other, Symbol):
-            return __add_symbol__(self, other)
-        if isinstance(other, _Number):
-            return __add_scalar__(self, scalar=other)
-        raise TypeError("type %s not supported" % str(type(other)))
-
-    def __radd__(self, other):
-        return self.__add__(other)
-
-    def __sub__(self, other):
-        """x.__sub__(y) <=> x-y"""
-        if isinstance(other, Symbol):
-            return __sub_symbol__(self, other)
-        if isinstance(other, _Number):
-            return __sub_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __rsub__(self, other):
-        if isinstance(other, _Number):
-            return __rsub_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __mul__(self, other):
-        """x.__mul__(y) <=> x*y"""
-        if isinstance(other, Symbol):
-            return __mul_symbol__(self, other)
-        if isinstance(other, _Number):
-            return __mul_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __rmul__(self, other):
-        return self.__mul__(other)
-
-    def __div__(self, other):
-        """x.__div__(y) <=> x/y"""
-        if isinstance(other, Symbol):
-            return __div_symbol__(self, other)
-        if isinstance(other, _Number):
-            return __div_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __rdiv__(self, other):
-        if isinstance(other, _Number):
-            return __rdiv_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __lshift__(self, other):
-        """x.__lshift__(y) <=> x << y"""
-        if isinstance(other, _Number):
-            return __lshift_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __rshift__(self, other):
-        """x.__rshift__(y) <=> x >> y"""
-        if isinstance(other, _Number):
-            return __rshift_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __truediv__(self, other):
-        return self.__div__(other)
-
-    def __rtruediv__(self, other):
-        return self.__rdiv__(other)
-
-    def __pow__(self, other):
-        """x.__pow__(y) <=> x**y"""
-        if isinstance(other, Symbol):
-            return __pow_symbol__(self, other)
-        if isinstance(other, _Number):
-            return __pow_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __rpow__(self, other):
-        if isinstance(other, _Number):
-            return __rpow_scalar__(self, scalar=other)
-        raise TypeError('type %s not supported' % str(type(other)))
-
-    def __neg__(self):
-        """x.__neg__() <=> -x"""
-        return self.__mul__(-1.0)
-
-    def __copy__(self):
-        return self.__deepcopy__()
-
-    def __deepcopy__(self, _=None):
-        """Returns a deep copy of the input object."""
-        handle = _base.SymbolHandle()
-        _base.check_call(_LIB.NNSymbolCopy(self.handle,
-                                           _ctypes.byref(handle)))
-        return Symbol(handle)
-
-    def __getitem__(self, index):
-        if isinstance(index, _base.string_types):
-            idx = None
-            for i, name in enumerate(self.list_output_names()):
-                if name == index:
-                    if idx is not None:
-                        raise ValueError('There are multiple outputs with name \"%s\"' % index)
-                    idx = i
-            if idx is None:
-                raise ValueError('Cannot find output that matches name \"%s\"' % index)
-            index = idx
-        if not isinstance(index, int):
-            raise TypeError('Symbol only support integer index to fetch i-th output')
-        handle = _base.SymbolHandle()
-        _check_call(_LIB.NNSymbolGetOutput(
-            self.handle, _base.nn_uint(index), _ctypes.byref(handle)))
-        return Symbol(handle=handle)
-
-    def __iter__(self):
-        return (self[i] for i in self.list_output_names())
-
-    def attr(self, key):
-        """Get attribute string from the symbol, this function only works for non-grouped symbol.
-
-        Parameters
-        ----------
-        key : str
-            The key to get attribute from.
-
-        Returns
-        -------
-        value : str
-            The attribute value of the key, returns None if attribute do not exist.
-        """
-        ret = _ctypes.c_char_p()
-        success = _ctypes.c_int()
-        _check_call(_LIB.NNSymbolGetAttr(
-            self.handle, _base.c_str(key), _ctypes.byref(ret), _ctypes.byref(success)))
-        if success.value != 0:
-            return _base.py_str(ret.value)
-        return None
-
-    def list_attr(self, recursive=False):
-        """Get all attributes from the symbol.
-
-        Parameters
-        ----------
-        recursive : bool
-            Default `False`. When `recursive` is `True`, list recursively all the
-            attributes in the descendents. The attribute names are pre-pended with
-            the symbol names to avoid conflicts. If `False`, then only attributes
-            that belongs to this symbol is returned, and the attribute names will
-            **not** be pre-pended with the symbol name.
-        """
-        size = _base.nn_uint()
-        pairs = _ctypes.POINTER(_ctypes.c_char_p)()
-        option = _ctypes.c_int(0) if recursive else _ctypes.c_int(1)
-        _check_call(_LIB.NNSymbolListAttrs(
-            self.handle, option, _ctypes.byref(size), _ctypes.byref(pairs)))
-        return {_base.py_str(pairs[i*2]): _base.py_str(pairs[i*2+1]) for i in range(size.value)}
-
-    def get_internals(self):
-        """Get a new grouped symbol whose output contains all the internal outputs of this symbol.
-
-        Returns
-        -------
-        sgroup : Symbol
-            The internal of the symbol.
-        """
-        handle = _base.SymbolHandle()
-        _check_call(_LIB.NNSymbolGetInternals(
-            self.handle, _ctypes.byref(handle)))
-        return Symbol(handle=handle)
-
-    def get_children(self):
-        """Gets a new grouped symbol whose output contains
-           inputs to output nodes of the original symbol."""
-        handle = _base.SymbolHandle()
-        _check_call(_LIB.NNSymbolGetChildren(
-            self.handle, _ctypes.byref(handle)))
-        ret = Symbol(handle=handle)
-        if not ret.list_output_names():
-            return None
-        return ret
-
-    def _get_list_copt(self, option):
-        """internal function to get list option"""
-        if option == 'all':
-            return _ctypes.c_int(0)
-        if option == 'read_only':
-            return _ctypes.c_int(1)
-        if option == 'aux_state':
-            return _ctypes.c_int(2)
-        raise ValueError("option need to be in {'all', 'read_only, 'aux_state'}")
-
-    def list_input_variables(self, option='all'):
-        """List all the input variables in the symbol.
-
-        Parameters
-        ----------
-        option : {'all', 'read_only', 'aux_state'}, optional
-           The listing option
-           - 'all' will list all the arguments.
-           - 'read_only' lists arguments that are readed by the graph.
-           - 'aux_state' lists arguments that are mutated by the graph as state.
-        Returns
-        -------
-        vars : list of symbol
-            List of all the variables
-        """
-        size = _ctypes.c_uint()
-        sarr = _ctypes.POINTER(_base.SymbolHandle)()
-        _check_call(_LIB.NNSymbolListInputVariables(
-            self.handle, self._get_list_copt(option),
-            _ctypes.byref(size), _ctypes.byref(sarr)))
-        return [Symbol(_base.SymbolHandle(sarr[i])) for i in range(size.value)]
-
-    def list_input_names(self, option='all'):
-        """List all the inputs in the symbol.
-
-        Parameters
-        ----------
-        option : {'all', 'read_only', 'aux_state'}, optional
-           The listing option
-           - 'all' will list all the arguments.
-           - 'read_only' lists arguments that are readed by the graph.
-           - 'aux_state' lists arguments that are mutated by the graph as state.
-        Returns
-        -------
-        args : list of string
-            List of all the arguments.
-        """
-        size = _ctypes.c_uint()
-        sarr = _ctypes.POINTER(_ctypes.c_char_p)()
-        _check_call(_LIB.NNSymbolListInputNames(
-            self.handle, self._get_list_copt(option),
-            _ctypes.byref(size), _ctypes.byref(sarr)))
-        return [_base.py_str(sarr[i]) for i in range(size.value)]
-
-    def list_output_names(self):
-        """List all outputs in the symbol.
-
-        Returns
-        -------
-        returns : list of string
-            List of all the outputs.
-        """
-        size = _ctypes.c_uint()
-        sarr = _ctypes.POINTER(_ctypes.c_char_p)()
-        _check_call(_LIB.NNSymbolListOutputNames(
-            self.handle, _ctypes.byref(size), _ctypes.byref(sarr)))
-        return [_base.py_str(sarr[i]) for i in range(size.value)]
-
-    def debug_str(self):
-        """Get a debug string.
-
-        Returns
-        -------
-        debug_str : string
-            Debug string of the symbol.
-        """
-        debug_str = _ctypes.c_char_p()
-        _check_call(_LIB.NNSymbolPrint(
-            self.handle, _ctypes.byref(debug_str)))
-        return _base.py_str(debug_str.value)
-
-    def _add_control_deps(self, deps):
-        """Add control flow dependencies.
-        This makes current op depend on the deps.
-        Only use when necessary,
-        this function mutate the current symbol node.
-
-        Returns
-        -------
-        deps : Symbol for list of symbol
-            The dependencies
-        """
-        if isinstance(deps, list):
-            deps = Group(deps)
-        _check_call(_LIB.NNAddControlDeps(
-            self.handle, deps.handle))
-
-
-def Variable(name, init=None, **kwargs):
-    """Create a symbolic variable with specified name.
-
-    Parameters
-    ----------
-    name : str
-        Name of the variable.
-    init : Symbol or numpy.ndarray
-        Symbol or numpy ndarray of initial value for the variable.
-        Note that for symbolic initialization value, it must be able
-        to be defined through InferShape, such as sym.zeros_like(v),
-        in which v is an input or parameter. Otherwise, pass a numpy
-        ndarray instead.
-    kwargs : dict of string -> string
-        Additional attributes to set on the variable.
-
-    Returns
-    -------
-    variable : Symbol
-        The created variable symbol.
-    """
-    if not isinstance(name, _base.string_types):
-        raise TypeError('Expect a string for variable `name`')
-    handle = _base.SymbolHandle()
-    _base.check_call(_LIB.NNSymbolCreateVariable(
-        _base.c_str(name), _ctypes.byref(handle)))
-    ret = Symbol(handle)
-    attr = AttrScope.current.get(kwargs)
-    if attr:
-        ret._set_attr(**attr)
-    if init is not None:
-        if not isinstance(init, (Symbol, np.ndarray)):
-            raise TypeError('Expect a Symbol or numpy ndarray'
-                            'for variable `init`')
-        _all_var_init[name] = init
-    return ret
-
-
-def Group(symbols):
-    """Create a symbol that groups symbols together.
-
-    Parameters
-    ----------
-    symbols : list
-        List of symbols to be grouped.
-
-    Returns
-    -------
-    sym : Symbol
-        The created group symbol.
-     """
-    ihandles = []
-    for sym in symbols:
-        if not isinstance(sym, Symbol):
-            raise TypeError('Expect Symbols in the list input')
-        ihandles.append(sym.handle)
-    handle = _base.SymbolHandle()
-    _check_call(_LIB.NNSymbolCreateGroup(
-        _base.nn_uint(len(ihandles)),
-        _base.c_array(_base.SymbolHandle, ihandles),
-        _ctypes.byref(handle)))
-    return Symbol(handle)
-
-# Set the real symbol class to Symbol
-_init_symbol_module(Symbol, "nnvm")
diff --git a/nnvm/python/nnvm/testing/__init__.py b/nnvm/python/nnvm/testing/__init__.py
deleted file mode 100644
index 506a9e9aa68b..000000000000
--- a/nnvm/python/nnvm/testing/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Utilities for testing and benchmarks"""
-from __future__ import absolute_import as _abs
-
-from .config import ctx_list
-from .utils import create_workload
-from . import mobilenet
-from . import mobilenet_v2
-from . import mlp
-from . import resnet
-from . import vgg
-from . import densenet
-from . import squeezenet
-from . import inception_v3
-from . import dcgan
-from . import dqn
-from . import check_computation
diff --git a/nnvm/python/nnvm/testing/check_computation.py b/nnvm/python/nnvm/testing/check_computation.py
deleted file mode 100644
index 63b3a17880a2..000000000000
--- a/nnvm/python/nnvm/testing/check_computation.py
+++ /dev/null
@@ -1,573 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=cell-var-from-loop,no-else-return
-"""Helper utilities to check functions and their gradients."""
-from __future__ import absolute_import as _abs
-
-import logging
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-from tvm.testing import check_numerical_grads
-from tvm import relay
-
-import nnvm
-from nnvm.compiler import graph_util
-from nnvm.compiler.graph_attr import TCODE_TO_DTYPE, DTYPE_TO_TCODE
-from nnvm.to_relay import to_relay
-from .config import ctx_list
-
-def infer_shapes_dtypes(graph, shape=None, dtype=None, fallback_dtype=None):
-    """Runs dtype and shape inference passes on a graph and returns the resulting graph
-    along with the inferred information.
-
-    Parameters
-    ----------
-    graph : nnvm.graph.Graph
-        A graph we want to run inference on.
-
-    shape : Dict[str, Tuple[int]] or Tuple[int], optional
-        A dict mapping input variable names to shapes.
-        By default shapes will be inferred from variables' attributes.
-        Note that this parameter takes precedence over variables' attributes.
-
-    dtype : Dict[str, str] or str, optional
-        A dict mapping input variable names to dtypes, or just a single dtype.
-        By default dtypes will be inferred from variables' attributes.
-        Note that this parameter takes precedence over variables' attributes.
-
-    fallback_dtype : str, optional
-        A dtype that will be used for variables whose dtype can't be inferred from other
-        variables' dtypes.
-
-    Returns
-    -------
-    graph : nnvm.graph.Graph
-        The resulting graph with dtype and shape information on its nodes.
-
-    input_shapes : Dict[str, Tuple[int]]
-        The inferred shapes of input variables merged with the `shape` dictionary.
-
-    input_dtypes : Dict[str, str]
-        The inferred dtypes of input variables merged with the `dtype` dictionary.
-
-    output_shapes : List[Tuple[int]]
-        The inferred shapes of outputs.
-
-    output_dtypes : List[str]
-        The inferred dtypes of outputs.
-    """
-    # Preprocess input parameters
-    if shape is None:
-        provided_shapes = {}
-    elif isinstance(shape, dict):
-        provided_shapes = shape
-    else:
-        provided_shapes = {x: shape for x in graph.symbol.list_input_variables()}
-
-    if dtype is None:
-        provided_dtypes = {}
-    elif isinstance(dtype, dict):
-        provided_dtypes = dtype
-    else:
-        provided_dtypes = {x: dtype for x in graph.symbol.list_input_variables()}
-
-    provided_shapes = _dict_var_to_dict_str(provided_shapes)
-    provided_dtypes = _dict_var_to_dict_str(provided_dtypes)
-
-    # The graph may already contain shape and dtype info, so extract it and merge with
-    # the user-specified shapes and dtypes (use the user-specified one on contradiction)
-    preexisting_shapes = graph.json_attr('shape')
-    preexisting_dtypes = graph.json_attr('dtype')
-
-    if preexisting_shapes:
-        for x in graph.index.input_names:
-            if x not in provided_shapes:
-                x_shape = tuple(preexisting_shapes[graph.index.entry_id(x)])
-                provided_shapes[x] = x_shape
-
-    if preexisting_dtypes:
-        for x in graph.index.input_names:
-            if x not in provided_dtypes:
-                x_dtype = TCODE_TO_DTYPE[preexisting_dtypes[graph.index.entry_id(x)]]
-                provided_dtypes[x] = x_dtype
-
-    # Perform inference
-    nnvm.compiler.graph_attr.set_shape_inputs(graph, provided_shapes)
-    nnvm.compiler.graph_attr.set_dtype_inputs(graph, provided_dtypes)
-
-    graph = graph.apply('InferShape').apply('InferType')
-
-    inferred_shapes = graph.json_attr('shape')
-    inferred_dtypes = graph.json_attr('dtype')
-
-    index = graph.index
-
-    output_shapes = [tuple(inferred_shapes[index.entry_id(entry)])
-                     for entry in index.output_entries]
-    output_dtypes = [TCODE_TO_DTYPE[inferred_dtypes[index.entry_id(entry)]]
-                     for entry in index.output_entries]
-
-    # Postprocess the results
-    input_shapes = provided_shapes.copy()
-    input_dtypes = provided_dtypes.copy()
-
-    for x in graph.symbol.list_input_variables():
-        x_name = x.attr('name')
-        x_entry_id = graph.index.entry_id(x_name)
-        input_shapes[x_name] = tuple(inferred_shapes[x_entry_id])
-        input_dtypes[x_name] = TCODE_TO_DTYPE[inferred_dtypes[x_entry_id]]
-
-    # Merge the original user-specified shapes in case some of them are specified for non-existing
-    # variables
-    for x_name, x_shape in provided_shapes.items():
-        x_shape = tuple(x_shape)
-        if input_shapes.get(x_name, x_shape) != x_shape:
-            raise RuntimeError("Inferred shape differs from the provided shape.\n"
-                               "Provided shapes: {}\nInferred shapes: {}"
-                               .format(provided_shapes, input_shapes))
-        else:
-            input_shapes[x_name] = x_shape
-
-    # Merge the original user-specified dtypes
-    for x_name, x_dtype in provided_dtypes.items():
-        if not isinstance(x_dtype, str):
-            x_dtype = TCODE_TO_DTYPE[x_dtype]
-        if input_dtypes.get(x_name, x_dtype) != x_dtype:
-            raise RuntimeError("Inferred dtype differs from the provided dtype.\n"
-                               "Provided dtypes: {}\nInferred dtypes: {}"
-                               .format(provided_dtypes, input_dtypes))
-        else:
-            input_dtypes[x_name] = x_dtype
-
-    # If some dtypes weren't inferred and there is a fallback dtype, assign it to those varibles
-    # and repeat the inference
-    if fallback_dtype is not None and not all(input_dtypes.values()):
-        input_dtypes = {x: input_dtypes[x] if input_dtypes[x] else fallback_dtype
-                        for x in input_dtypes}
-        return infer_shapes_dtypes(graph, input_shapes, input_dtypes, fallback_dtype=None)
-
-    return graph, input_shapes, input_dtypes, output_shapes, output_dtypes
-
-def graph_to_function(graph, target, ctx, shape=None, dtype=None):
-    """Convert a graph to a function taking a keyword args and returning a list of results
-    (both args and results are numpy arrays).
-
-    Example::
-
-        fun = graph_to_function(graph, llvm, cpu(0))
-        [res1, res2] = fun(x=np.zeros((1,2)), y=np.zeros((1,)))
-
-    Parameters
-    ----------
-    graph : nnvm.graph.Graph
-        A graph we want to convert to a function.
-
-    target : str or :any:`tvm.target.Target`
-        The build target
-
-    ctx : TVMContext
-        The context to deploy the module.
-
-    shape : Dict[str, Tuple[int]], optional
-        A dict mapping input variable names to shapes.
-        By default shapes will be inferred from variables' attributes.
-        Note that this parameter takes precedence over variables' attributes.
-
-    dtype : Dict[str, str] or str, optional
-        A dict mapping input variable names to dtypes, or just a single dtype.
-        By default dtypes will be inferred from variables' attributes.
-        Note that this parameter takes precedence over variables' attributes.
-
-    Returns
-    -------
-    function : Callable[..., List[numpy.ndarray]]
-    """
-    # Infer missing shapes and dtypes
-    graph, shape, dtype, output_shapes, output_dtypes = \
-        infer_shapes_dtypes(graph, shape=shape, dtype=dtype)
-
-    if None in dtype.values():
-        raise ValueError("Input variables with no type: {}".format(dtype))
-
-    if not all(shape.values()):
-        raise ValueError("Input variables with no shape: {}".format(shape))
-
-    compute_graph, lib, params = nnvm.compiler.build(graph, target, shape=shape, dtype=dtype)
-    module = graph_runtime.create(compute_graph, lib, ctx)
-
-    if params:
-        module.set_inputs(**params)
-
-    def run(**kwargs):
-        module.run(**kwargs)
-        res = []
-        for i, (o_shape, o_dtype) in enumerate(zip(output_shapes, output_dtypes)):
-            res.append(module.get_output(i, tvm.nd.empty(o_shape, o_dtype)).asnumpy())
-        return res
-
-    return run
-
-def _dict_var_to_dict_str(dictionary):
-    """Convert a Dict[nnvm.Symbol, T] to Dict[str, T]"""
-    if isinstance(dictionary, dict):
-        return {s.attr('name') if isinstance(s, nnvm.symbol.Symbol) else s:
-                dictionary[s] for s in dictionary}
-    else:
-        return dictionary
-
-def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
-                   shape=None, dtype=None, in_range=None, values=None,
-                   exclude_targets=None, only_targets=None,
-                   additional_params=None,
-                   numerical_grads=None, numerical_grads_params=None,
-                   atol=1e-5, rtol=1e-5, quiet=False):
-    """Compute the function and/or its gradients on a random input and raise
-    an exception if the result doesn't match the reference implementation.
-
-    Parameters
-    ----------
-    symbol : nnvm.Symbol
-        A symbol representing the output.
-
-    forward : Callable[..., List[numpy.ndarray]], optional
-        A reference implementation to compare with.
-
-    backward : Callable[..., List[numpy.ndarray] or Dict[str, numpy.ndarray]], optional
-        A reference implementation of gradients. Should also accept head_grads besides
-        normal inputs which is a list of gradients of some scalar wrt the outputs or just a
-        single gradient if there are multiple outputs.
-        Should return either a dict mapping input variable names to the respective
-        gradients or a list of gradients wrt variables from grad_input_vars in
-        exactly the same order (in alphabetical order by default).
-
-    grad_input_vars : List[nnvm.Symbol or str], optional
-        A list of variables with respect to which the gradients will be computed.
-        None (default) means that all input variables will be used in an alphabetical order.
-
-    shape : Dict[nnvm.Symbol or str, Tuple[int]] or Tuple[int], optional
-        A dict mapping input variable names to shapes, or just a single shape.
-        By default shapes will be inferred from variables' attributes (see the Examples).
-        Note that this parameter takes precedence over variables' attributes.
-
-    dtype : Dict[nnvm.Symbol or str, str] or str, optional
-        A dict mapping input variable names to dtypes, or just a single dtype.
-        By default dtypes will be inferred from variables' attributes (see the Examples).
-        If dtypes cannot be inferred for some variables then float32 will be used as a fallback.
-        Note that this parameter takes precedence over variables' attributes.
-
-    in_range : Dict[nnvm.Symbol or str, (float, float)] or (float, float), optional
-        A dict mapping input variable names to ranges or just a single range
-        (the same for all variables). Input values will be generated from
-        uniform distributions on these ranges. `head_grads` can also be
-        assigned a range this way.
-
-    values : Dict[nnvm.Symbol or str, numpy.ndarray], optional
-        A dict explicitly providing values for some variables instead of random generation.
-
-    exclude_targets : Set[str], optional
-        Skip compiling and running anything for these targets.
-
-    only_targets : Set[str], optional
-        Test only for those targets from `ctx_list()` that are also in this set.
-
-    additional_params : dict, optional
-        A dict of additional parameters which will be passed to forward and backward.
-
-    numerical_grads : bool or 'if_possible', optional
-        Whether to additionally check against numerically computed gradients. If 'if_possible' or
-        None is passed (which is the default) then it will try to create a gradient computation
-        graph and then check gradients numerically only if this graph can be created (i.e. if there
-        are some operations with unimplemented gradients, it will just issue a warning).
-        Checking against numerical gradients is done via the `check_numerical_grads` function.
-
-    numerical_grads_params : dict, optional
-        Additional parameters for `check_numerical_grads`.
-
-    atol : float, optional
-        Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.
-
-    rtol : float, optional
-        Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.
-
-    quiet : bool, optional
-        Don't dump additional information to stdout on failure.
-
-    Examples
-    --------
-    .. code-block:: python
-
-        x = sym.Variable("x", shape=(1, 2))
-        y = sym.Variable("y", shape=(1, 2))
-
-        # check the function and its gradients both numerically and using a reference function
-        check_function(x + 2*y,
-                       lambda x, y: x + 2*y,
-                       lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads})
-
-        # just check gradients numerically
-        check_function(x + 2*y, numerical_grads=True)
-
-        # just check the forward computation
-        check_function(x + 2*y, lambda x, y: x + 2*y, numerical_grads=False)
-
-        # specifying dtype
-        check_function(x + 2*y, lambda x, y: x + 2*y, dtype='float64')
-
-        # dtypes can also be specified during variable creation with dtype codes
-        x = sym.Variable("x", dtype=0)
-        check_function(x + 1, shape=(2, 2), numerical_grads=True)
-    """
-    # validate and preprocess the input params
-    if numerical_grads is None and forward is None and backward is None:
-        raise ValueError("No reference function was passed to check_function. If you only want to "
-                         "check gradients numerically, pass numerical_grads=True explicitly.")
-
-    if numerical_grads is None:
-        numerical_grads = 'if_possible'
-
-    if numerical_grads not in [False, True, 'if_possible']:
-        raise ValueError("numerical_grads must be a bool or 'if_possible', not {}"
-                         .format(numerical_grads))
-
-    if additional_params is None:
-        additional_params = {}
-
-    input_vars = symbol.list_input_variables()
-    input_dict = {x.attr('name'): x for x in input_vars}
-
-    if grad_input_vars is None:
-        grad_input_vars = sorted(input_vars, key=lambda x: x.attr('name'))
-    else:
-        grad_input_vars = [input_dict[x] if isinstance(x, str) else x for x in grad_input_vars]
-
-    in_range = _dict_var_to_dict_str(in_range)
-    values = _dict_var_to_dict_str(values)
-
-    out_len = len(symbol.list_output_names())
-
-    # Infer the output shapes and dtypes, and preprocess the shape and dtype params
-    forward_graph, shape, dtype, out_shapes, out_dtypes = \
-        infer_shapes_dtypes(nnvm.graph.create(symbol), shape=shape, dtype=dtype,
-                            fallback_dtype='float32')
-
-    if not all(out_shapes) or not all(out_dtypes):
-        if not quiet:
-            print(forward_graph.ir(join_node_attrs=['shape', 'dtype']))
-        raise ValueError("Could not infer shapes or dtypes for outputs.\n"
-                         "out_shapes = {}\nout_dtypes = {}".format(out_shapes, out_dtypes))
-
-    backward_graph = None
-
-    # If we want gradients, we have to recreate the graph, but now with gradient computations
-    # Note that here we need out_shapes for defining the shape of head grads, so we have to
-    # create the graph twice
-    if backward is not None or numerical_grads:
-        try:
-            head_grads_symbols = [nnvm.symbol.Variable("head_grads_" + str(i),
-                                                       shape=out_shapes[i],
-                                                       dtype=DTYPE_TO_TCODE[out_dtypes[i]])
-                                  for i in range(out_len)]
-            grad_symbols = graph_util.gradients([symbol], grad_input_vars,
-                                                grad_ys=head_grads_symbols)
-            # Sometimes grads do not depend on head_grads, so head_grads does not appear
-            # in the variable list; adding it manually prevents this, making things a bit easier
-            backward_graph = \
-                nnvm.graph.create(nnvm.symbol.Group([symbol] + grad_symbols + head_grads_symbols))
-
-            backward_graph, shape, dtype, out_shapes, out_dtypes = \
-                infer_shapes_dtypes(backward_graph, shape=shape, dtype=dtype,
-                                    fallback_dtype='float32')
-        except nnvm._base.NNVMError as err:
-            if backward is None and numerical_grads == "if_possible":
-                logging.warning("Won't check gradients because: %s", str(err).split('\n', 1)[0])
-                numerical_grads = False
-                backward_graph = None
-            else:
-                raise
-
-    main_graph = backward_graph if backward_graph is not None else forward_graph
-
-    # Generate random data for inputs (including head_grads)
-
-    np_inputs = {}
-
-    for x in main_graph.symbol.list_input_variables():
-        x_name = x.attr('name')
-        x_shape = shape[x_name]
-        x_dtype = dtype[x_name]
-
-        if values is not None and x_name in values:
-            np_inputs[x_name] = values[x_name].astype(x_dtype)
-            continue
-
-        low = -1.0
-        high = 1.0
-        if in_range is not None:
-            if isinstance(in_range, dict):
-                if x_name in in_range:
-                    low = in_range[x_name][0]
-                    high = in_range[x_name][1]
-            else:
-                low = in_range[0]
-                high = in_range[1]
-
-        np_inputs[x_name] = np.random.uniform(size=x_shape, low=low, high=high).astype(x_dtype)
-
-    np_inputs_without_head_grads = {k: np_inputs[k] for k in np_inputs
-                                    if not k.startswith('head_grads_')}
-
-    nothing_was_done = True
-
-    # Compute and compare the results
-    for target, ctx in ctx_list():
-        if exclude_targets is not None:
-            if target in exclude_targets or str(target) in exclude_targets:
-                logging.info("Skipping target = %s, ctx = %s", target, ctx)
-                continue
-        if only_targets is not None:
-            if target not in only_targets and str(target) not in only_targets:
-                logging.info("Skipping target = %s, ctx = %s", target, ctx)
-                continue
-
-        logging.info("Checking computation on target = %s, ctx = %s", target, ctx)
-
-        debug_stage = None
-
-        try:
-            nnvm_res = None
-
-            debug_stage = "compiling"
-            main_function = graph_to_function(main_graph, target, ctx)
-
-            # nnvm_res contains the output and gradients (if they are needed)
-            debug_stage = "running"
-            nnvm_res = main_function(**np_inputs)
-
-            try:
-                logging.debug("checking to_relay conversion")
-                inputs = np_inputs_without_head_grads.copy()
-                func, inputs = to_relay(main_graph, shape, dtype, params=inputs)
-                with relay.build_config(opt_level=3):
-                    graph, lib, params = relay.build(func, target=target)
-                m = graph_runtime.create(graph, lib, ctx)
-                m.set_input(**inputs)
-                m.set_input(**params)
-                m.run()
-                for i in range(out_len):
-                    relay_out = m.get_output(i).asnumpy()
-                    tvm.testing.assert_allclose(nnvm_res[i], relay_out, atol=atol, rtol=rtol)
-            except NotImplementedError as err:
-                # the NNVM operator is not supported yet
-                logging.warning(err)
-
-            if backward_graph is not None:
-                grad_var_names = [x.attr('name') for x in grad_input_vars]
-                nnvm_grads = {x: v for x, v in zip(grad_var_names, nnvm_res[out_len:])}
-
-            if forward is not None:
-                nothing_was_done = False
-                debug_stage = "checking forward computation"
-                logging.debug(debug_stage)
-
-                params = {}
-                params.update(np_inputs_without_head_grads)
-                params.update(additional_params)
-                numpy_res = forward(**params)
-
-                if isinstance(numpy_res, tuple):
-                    numpy_res = list(numpy_res)
-
-                if not isinstance(numpy_res, list):
-                    numpy_res = [numpy_res]
-
-                if len(numpy_res) != out_len:
-                    raise ValueError("Forward function returned {} values, but "
-                                     "the nnvm graph returns {} values"
-                                     .format(len(numpy_res), out_len))
-
-                for i in range(out_len):
-                    tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol)
-
-            if backward is not None:
-                nothing_was_done = False
-                debug_stage = "checking gradients"
-                logging.debug(debug_stage)
-
-                np_head_grads = [np_inputs["head_grads_" + str(i)] for i in range(out_len)]
-
-                if out_len == 1:
-                    np_head_grads = np_head_grads[0]
-
-                params = {'head_grads': np_head_grads}
-                params.update(np_inputs_without_head_grads)
-                params.update(additional_params)
-                numpy_grads = backward(**params)
-
-                if not isinstance(numpy_grads, dict):
-                    if isinstance(numpy_grads, tuple):
-                        numpy_grads = list(numpy_grads)
-                    if not isinstance(numpy_grads, list):
-                        numpy_grads = [numpy_grads]
-                    numpy_grads = {x: v for x, v in zip(grad_var_names, numpy_grads)}
-                    if len(numpy_grads) != len(grad_var_names):
-                        raise ValueError("The backward function returns a list of gradients which "
-                                         "does not contain gradients for these variables: {}"
-                                         .format(set(grad_var_names) - set(numpy_grads)))
-
-                for x_name in numpy_grads:
-                    tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name],
-                                                atol=atol, rtol=rtol)
-
-            if numerical_grads:
-                nothing_was_done = False
-                debug_stage = "checking gradients numerically"
-                logging.debug(debug_stage)
-
-                forward_function = graph_to_function(forward_graph, target, ctx)
-
-                # Since the result may be non-scalar, we have to put another operation on the top,
-                # so we just multiple by the randomly generated head_grads and then sum everything.
-                # This way we can reuse the gradient values which has been already computed.
-                def scalar_function(**kwargs):
-                    res = forward_function(**kwargs)
-                    return np.sum([np.dot(np_inputs['head_grads_' + str(i)].ravel(), res[i].ravel())
-                                   for i in range(out_len)])
-
-                if numerical_grads_params is None:
-                    numerical_grads_params = {}
-
-                check_numerical_grads(
-                    scalar_function,
-                    input_values=np_inputs_without_head_grads,
-                    grad_values=nnvm_grads,
-                    **numerical_grads_params)
-
-        except:
-            if not quiet:
-                print("\ncheck_function failed while {}, here is the main graph"
-                      .format(debug_stage))
-                print(main_graph.ir(join_node_attrs=['shape', 'dtype']))
-                if nnvm_res is not None:
-                    print("Generated inputs:")
-                    print(np_inputs)
-                    print()
-            raise
-
-    if nothing_was_done:
-        logging.warning("Nothing was done in check_function. Check ctx_list().")
diff --git a/nnvm/python/nnvm/testing/config.py b/nnvm/python/nnvm/testing/config.py
deleted file mode 100644
index 175478b6e14a..000000000000
--- a/nnvm/python/nnvm/testing/config.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Configuration about tests"""
-from __future__ import absolute_import as _abs
-
-import os
-import tvm
-
-def ctx_list():
-    """Get context list for testcases"""
-    device_list = os.environ.get("NNVM_TEST_TARGETS", "")
-    device_list = (device_list.split(",") if device_list
-                   else ["llvm", "cuda"])
-    device_list = set(device_list)
-    res = [(device, tvm.context(device, 0)) for device in device_list]
-    return [x for x in res if x[1].exist]
diff --git a/nnvm/python/nnvm/testing/dcgan.py b/nnvm/python/nnvm/testing/dcgan.py
deleted file mode 100644
index 714b3fbb1301..000000000000
--- a/nnvm/python/nnvm/testing/dcgan.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-Symbol of the generator of DCGAN
-
-Adopted from:
-https://github.com/tqchen/mxnet-gan/blob/master/mxgan/generator.py
-
-Reference:
-Radford, Alec, Luke Metz, and Soumith Chintala.
-"Unsupervised representation learning with deep convolutional generative adversarial networks."
-arXiv preprint arXiv:1511.06434 (2015).
-"""
-from .. import symbol as sym
-from . utils import create_workload
-
-def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)):
-    """a deconv layer that enlarges the feature map"""
-    target_shape = (oshape[-2], oshape[-1])
-
-    pad_y = (kshape[0] - 1) // 2
-    pad_x = (kshape[1] - 1) // 2
-    adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0]
-    adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1]
-
-    net = sym.conv2d_transpose(data,
-                               kernel_size=kshape,
-                               strides=stride,
-                               channels=oshape[0],
-                               padding=(pad_y, pad_x),
-                               output_padding=(adj_y, adj_x),
-                               use_bias=False,
-                               name=name)
-    return net
-
-def deconv2d_bn_relu(data, prefix, **kwargs):
-    """a block of deconv + batch norm + relu"""
-    eps = 1e-5 + 1e-12
-    net = deconv2d(data, name="%s_deconv" % prefix, **kwargs)
-    net = sym.batch_norm(net, epsilon=eps, name="%s_bn" % prefix)
-    net = sym.relu(net, name="%s_act" % prefix)
-    return net
-
-def get_symbol(oshape, ngf=128, code=None):
-    """get symbol of dcgan generator"""
-    assert oshape[-1] == 64, "Only support 64x64 image"
-    assert oshape[-2] == 64, "Only support 64x64 image"
-
-    code = sym.Variable("data") if code is None else code
-    net = sym.dense(code, name="g1", units=4*4*ngf*8, use_bias=False)
-    net = sym.relu(net)
-    # 4 x 4
-    net = sym.reshape(net, shape=(-1, ngf * 8, 4, 4))
-    # 8 x 8
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2")
-    # 16x16
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3")
-    # 32x32
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4")
-    # 64x64
-    net = deconv2d(
-        net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv")
-    net = sym.tanh(net)
-    return net
-
-
-def get_workload(batch_size, oshape=(3, 64, 64), ngf=128, random_len=100, dtype="float32"):
-    """Get benchmark workload for a DCGAN generator
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-    oshape : tuple, optional
-        The shape of output image, layout="CHW"
-    ngf: int, optional
-        The number of final feature maps in the generator
-    random_len : int, optional
-        The length of random input
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : nnvm.symbol
-        The computational graph
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(oshape=oshape, ngf=ngf)
-    return create_workload(net, batch_size, (random_len, ), dtype)
diff --git a/nnvm/python/nnvm/testing/densenet.py b/nnvm/python/nnvm/testing/densenet.py
deleted file mode 100644
index 92ba2bf46a8f..000000000000
--- a/nnvm/python/nnvm/testing/densenet.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-DenseNet, load model from gluon model zoo
-
-Reference:
-Huang, Gao, et al. "Densely Connected Convolutional Networks." CVPR 2017
-"""
-
-from .utils import create_workload
-from ..frontend.mxnet import _from_mxnet_impl
-
-def get_workload(batch_size, num_classes=1000, num_layers=121, dtype="float32"):
-    """Get benchmark workload for mobilenet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    num_layers : int, optional
-        The number of layers
-
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    import mxnet as mx
-    from mxnet.gluon.model_zoo.vision import get_model
-
-    image_shape = (1, 3, 224, 224)
-
-    block = get_model('densenet%d' % num_layers, classes=num_classes, pretrained=False)
-
-    data = mx.sym.Variable('data')
-    sym = block(data)
-    sym = mx.sym.SoftmaxOutput(sym)
-
-    net = _from_mxnet_impl(sym, {})
-
-    return create_workload(net, batch_size, image_shape[1:], dtype)
diff --git a/nnvm/python/nnvm/testing/dqn.py b/nnvm/python/nnvm/testing/dqn.py
deleted file mode 100644
index b04475efa32a..000000000000
--- a/nnvm/python/nnvm/testing/dqn.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-Symbol of Nature DQN
-
-Reference:
-Mnih, Volodymyr, et al. "Human-level control through deep reinforcement learning."
-Nature 518.7540 (2015): 529.
-"""
-
-from .. import symbol as sym
-from . utils import create_workload
-
-def get_symbol(num_actions=18):
-    """get symbol of nature dqn"""
-    data = sym.Variable(name='data')
-    net = sym.conv2d(data, kernel_size=(8, 8), strides=(4, 4), padding=(0, 0),
-                     channels=32, name='conv1')
-    net = sym.relu(net, name='relu1')
-    net = sym.conv2d(net, kernel_size=(4, 4), strides=(2, 2), padding=(0, 0),
-                     channels=64, name='conv2')
-    net = sym.relu(net, name='relu2')
-    net = sym.conv2d(net, kernel_size=(3, 3), strides=(1, 1), padding=(0, 0),
-                     channels=64, name='conv3')
-    net = sym.relu(net, name='relu3')
-    net = sym.flatten(net, name='flatten')
-    net = sym.dense(net, units=512, name='fc4')
-    net = sym.relu(net, name='relu4')
-    net = sym.dense(net, units=num_actions, name='fc5')
-
-    return net
-
-
-def get_workload(batch_size, num_actions=18, image_shape=(4, 84, 84), dtype="float32"):
-    """Get benchmark workload for a Deep Q Network
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-    num_actions : int, optional
-        Number of actions
-    image_shape : tuple, optional
-        The input image shape
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : nnvm.symbol
-        The computational graph
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_actions=num_actions)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/testing/inception_v3.py b/nnvm/python/nnvm/testing/inception_v3.py
deleted file mode 100644
index e1614d7a9fed..000000000000
--- a/nnvm/python/nnvm/testing/inception_v3.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision."
-arXiv preprint arXiv:1512.00567 (2015).
-
-Adopted from https://github.com/apache/incubator-mxnet/blob/
-             master/example/image-classification/symbols/inception-v3.py
-"""
-# pylint: disable=invalid-name,missing-docstring,unused-argument
-from .. import symbol as sym
-from .utils import create_workload
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''):
-    conv = sym.conv2d(data=data, channels=num_filter, kernel_size=kernel,
-                      strides=stride, padding=pad, use_bias=False,
-                      name='%s%s_conv2d' % (name, suffix))
-    bn = sym.batch_norm(data=conv, name='%s%s_batchnorm' % (name, suffix), epsilon=2e-5)
-    act = sym.relu(data=bn, name='%s%s_relu' % (name, suffix))
-    return act
-
-def Pooling(data, kernel, stride, pad, pool_type, name):
-    if pool_type == 'max':
-        return sym.max_pool2d(data=data, pool_size=kernel, strides=stride, padding=pad, name=name)
-    if pool_type == 'avg':
-        return sym.avg_pool2d(data=data, pool_size=kernel, strides=stride, padding=pad, name=name,
-                              count_include_pad=True)
-    raise ValueError("Invalid pooling type: " + pool_type)
-
-def Inception7A(data,
-                num_1x1,
-                num_3x3_red, num_3x3_1, num_3x3_2,
-                num_5x5_red, num_5x5,
-                pool, proj,
-                name):
-    tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name))
-    tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv')
-    tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name),
-                     suffix='_conv_1')
-    tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
-    tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name),
-                     suffix='_conv_1')
-    tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name),
-                     suffix='_conv_2')
-    pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool,
-                      name=('%s_pool_%s_pool' % (pool, name)))
-
-    cproj = Conv(pooling, proj, name=('%s_tower_2' % name), suffix='_conv')
-    concat = sym.concatenate(*[tower_1x1, tower_5x5, tower_3x3, cproj],
-                             name='ch_concat_%s_chconcat' % name)
-    return concat
-
-# First Downsample
-def Inception7B(data,
-                num_3x3,
-                num_d3x3_red, num_d3x3_1, num_d3x3_2,
-                pool,
-                name):
-    tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2),
-                     name=('%s_conv' % name))
-    tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1),
-                      name=('%s_tower' % name), suffix='_conv_1')
-    tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2),
-                      name=('%s_tower' % name), suffix='_conv_2')
-    pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0, 0), pool_type="max",
-                      name=('max_pool_%s_pool' % name))
-    concat = sym.concatenate(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def Inception7C(data,
-                num_1x1,
-                num_d7_red, num_d7_1, num_d7_2,
-                num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4,
-                pool, proj,
-                name):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
-    tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3),
-                    name=('%s_tower' % name), suffix='_conv_1')
-    tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0),
-                    name=('%s_tower' % name), suffix='_conv_2')
-    tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0),
-                    name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3),
-                    name=('%s_tower_1' % name), suffix='_conv_2')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0),
-                    name=('%s_tower_1' % name), suffix='_conv_3')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3),
-                    name=('%s_tower_1' % name), suffix='_conv_4')
-    pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool,
-                      name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1),
-                 name=('%s_tower_2' % name), suffix='_conv')
-    # concat
-    concat = sym.concatenate(*[tower_1x1, tower_d7, tower_q7, cproj],
-                             name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def Inception7D(data,
-                num_3x3_red, num_3x3,
-                num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3,
-                pool,
-                name):
-    tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name),
-                     suffix='_conv')
-    tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2),
-                     name=('%s_tower' % name), suffix='_conv_1')
-    tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name),
-                        suffix='_conv')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3),
-                        name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0),
-                        name=('%s_tower_1' % name), suffix='_conv_2')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2),
-                        name=('%s_tower_1' % name), suffix='_conv_3')
-    pooling = Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, pad=(0, 0),
-                      name=('%s_pool_%s_pool' % (pool, name)))
-    # concat
-    concat = sym.concatenate(*[tower_3x3, tower_d7_3x3, pooling],
-                             name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def Inception7E(data,
-                num_1x1,
-                num_d3_red, num_d3_1, num_d3_2,
-                num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2,
-                pool, proj,
-                name):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
-    tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1),
-                      name=('%s_tower' % name), suffix='_mixed_conv')
-    tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0),
-                      name=('%s_tower' % name), suffix='_mixed_conv_1')
-    tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name),
-                        suffix='_conv')
-    tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1),
-                        name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1),
-                          name=('%s_tower_1' % name), suffix='_mixed_conv')
-    tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0),
-                          name=('%s_tower_1' % name), suffix='_mixed_conv_1')
-    pooling = Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool,
-                      name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' % name),
-                 suffix='_conv')
-    # concat
-    concat = sym.concatenate(
-        *[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj],
-        name='ch_concat_%s_chconcat' % name)
-    return concat
-
-
-def get_symbol(num_classes=1000, **kwargs):
-    data = sym.Variable(name="data")
-    # stage 1
-    conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
-    conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
-    conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
-    pool = Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0),
-                   name="pool")
-    # stage 2
-    conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
-    conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
-    pool1 = Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", pad=(0, 0),
-                    name="pool1")
-
-    # stage 3
-    in3a = Inception7A(pool1, 64,
-                       64, 96, 96,
-                       48, 64,
-                       "avg", 32, "mixed")
-    in3b = Inception7A(in3a, 64,
-                       64, 96, 96,
-                       48, 64,
-                       "avg", 64, "mixed_1")
-    in3c = Inception7A(in3b, 64,
-                       64, 96, 96,
-                       48, 64,
-                       "avg", 64, "mixed_2")
-    in3d = Inception7B(in3c, 384,
-                       64, 96, 96,
-                       "max", "mixed_3")
-    # stage 4
-    in4a = Inception7C(in3d, 192,
-                       128, 128, 192,
-                       128, 128, 128, 128, 192,
-                       "avg", 192, "mixed_4")
-    in4b = Inception7C(in4a, 192,
-                       160, 160, 192,
-                       160, 160, 160, 160, 192,
-                       "avg", 192, "mixed_5")
-    in4c = Inception7C(in4b, 192,
-                       160, 160, 192,
-                       160, 160, 160, 160, 192,
-                       "avg", 192, "mixed_6")
-    in4d = Inception7C(in4c, 192,
-                       192, 192, 192,
-                       192, 192, 192, 192, 192,
-                       "avg", 192, "mixed_7")
-    in4e = Inception7D(in4d, 192, 320,
-                       192, 192, 192, 192,
-                       "max", "mixed_8")
-    # stage 5
-    in5a = Inception7E(in4e, 320,
-                       384, 384, 384,
-                       448, 384, 384, 384,
-                       "avg", 192, "mixed_9")
-    in5b = Inception7E(in5a, 320,
-                       384, 384, 384,
-                       448, 384, 384, 384,
-                       "max", 192, "mixed_10")
-    # pool
-    pool = Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", pad=(0, 0),
-                   name="global_pool")
-    flatten = sym.flatten(data=pool, name="flatten")
-    fc1 = sym.dense(data=flatten, units=num_classes, name='fc1')
-    softmax = sym.softmax(data=fc1, name='softmax')
-    return softmax
-
-def get_workload(batch_size=1, num_classes=1000,
-                 image_shape=(3, 299, 299), dtype="float32", **kwargs):
-    """Get benchmark workload for InceptionV3
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    kwargs : dict
-        Extra arguments
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes, **kwargs)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/testing/init.py b/nnvm/python/nnvm/testing/init.py
deleted file mode 100644
index 611c81e69483..000000000000
--- a/nnvm/python/nnvm/testing/init.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Initializer of parameters."""
-import numpy as np
-
-class Initializer(object):
-    """The base class of an initializer."""
-    def __init__(self, **kwargs):
-        self._kwargs = kwargs
-
-    def __call__(self, desc, arr):
-        """Initialize an array
-
-        Parameters
-        ----------
-        desc : str
-            Initialization pattern descriptor.
-
-        arr : NDArray
-            The array to be initialized.
-        """
-        if desc.endswith('weight'):
-            self._init_weight(desc, arr)
-        elif desc.endswith('bias'):
-            self._init_bias(desc, arr)
-        elif desc.endswith('gamma'):
-            self._init_gamma(desc, arr)
-        elif desc.endswith('beta'):
-            self._init_beta(desc, arr)
-        elif desc.endswith('mean'):
-            self._init_mean(desc, arr)
-        elif desc.endswith('var'):
-            self._init_var(desc, arr)
-        else:
-            self._init_default(desc, arr)
-
-    def _init_bias(self, _, arr):
-        arr[:] = 0.0
-
-    def _init_gamma(self, _, arr):
-        arr[:] = 1.0
-
-    def _init_beta(self, _, arr):
-        arr[:] = 0.0
-
-    def _init_mean(self, _, arr):
-        arr[:] = 0.0
-
-    def _init_var(self, _, arr):
-        arr[:] = 1.0
-
-    def _init_weight(self, name, arr):
-        """Abstract method to Initialize weight."""
-        raise NotImplementedError("Must override it")
-
-    def _init_default(self, name, _):
-        raise ValueError(
-            'Unknown initialization pattern for %s. ' \
-            'Default initialization is now limited to '\
-            '"weight", "bias", "gamma" (1.0), and "beta" (0.0).' \
-            'Please use mx.sym.Variable(init=mx.init.*) to set initialization pattern' % name)
-
-
-class Xavier(Initializer):
-    """ "Xavier" initialization for weights
-
-    Parameters
-    ----------
-    rnd_type: str, optional
-        Random generator type, can be ``'gaussian'`` or ``'uniform'``.
-
-    factor_type: str, optional
-        Can be ``'avg'``, ``'in'``, or ``'out'``.
-
-    magnitude: float, optional
-        Scale of random number.
-    """
-    def __init__(self, rnd_type="uniform", factor_type="avg", magnitude=3):
-        super(Xavier, self).__init__(rnd_type=rnd_type,
-                                     factor_type=factor_type,
-                                     magnitude=magnitude)
-        self.rnd_type = rnd_type
-        self.factor_type = factor_type
-        self.magnitude = float(magnitude)
-
-    def _init_weight(self, name, arr):
-        shape = arr.shape
-        hw_scale = 1.
-        if len(shape) < 2:
-            raise ValueError('Xavier initializer cannot be applied to vector {0}. It requires at'
-                             ' least 2D.'.format(name))
-        if len(shape) > 2:
-            hw_scale = np.prod(shape[2:])
-        fan_in, fan_out = shape[1] * hw_scale, shape[0] * hw_scale
-        factor = 1.
-        if self.factor_type == "avg":
-            factor = (fan_in + fan_out) / 2.0
-        elif self.factor_type == "in":
-            factor = fan_in
-        elif self.factor_type == "out":
-            factor = fan_out
-        else:
-            raise ValueError("Incorrect factor type")
-        # Hack for mobilenet, because there is less connectivity
-        if "depthwise" in name:
-            factor = 3 * 3
-        scale = np.sqrt(self.magnitude / factor)
-        if self.rnd_type == "uniform":
-            arr[:] = np.random.uniform(-scale, scale, size=arr.shape)
-        else:
-            raise ValueError("Unknown random type")
diff --git a/nnvm/python/nnvm/testing/mlp.py b/nnvm/python/nnvm/testing/mlp.py
deleted file mode 100644
index 1b6975661fe4..000000000000
--- a/nnvm/python/nnvm/testing/mlp.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-a simple multilayer perceptron
-"""
-from .. import symbol as sym
-from . utils import create_workload
-
-def get_symbol(num_classes=1000):
-    data = sym.Variable('data')
-    data = sym.flatten(data=data)
-    fc1 = sym.dense(data=data, name='fc1', units=128)
-    act1 = sym.relu(data=fc1, name='relu1')
-    fc2 = sym.dense(data=act1, name='fc2', units=64)
-    act2 = sym.relu(data=fc2, name='relu2')
-    fc3 = sym.dense(data=act2, name='fc3', units=num_classes)
-    mlp = sym.softmax(data=fc3, name='softmax')
-    return mlp
-
-def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), dtype="float32"):
-    """Get benchmark workload for a simple multilayer perceptron
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of claseses
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : nnvm.symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/testing/mobilenet.py b/nnvm/python/nnvm/testing/mobilenet.py
deleted file mode 100644
index e505ff499a54..000000000000
--- a/nnvm/python/nnvm/testing/mobilenet.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Helper utility to get mobilenet workload for testing."""
-# pylint: disable=invalid-name
-from __future__ import absolute_import as _abs
-
-from .. import symbol as sym
-from . utils import create_workload
-
-def conv_block(data, name, channels,
-               kernel_size=(3, 3), strides=(1, 1), padding=(1, 1),
-               epsilon=1e-5):
-    """Helper function to construct conv-bn-relu"""
-    # convolution + bn + relu
-    conv = sym.conv2d(data=data, channels=channels,
-                      kernel_size=kernel_size, strides=strides,
-                      padding=padding, use_bias=False,
-                      layout="NCHW", name=name + "_conv")
-    bn = sym.batch_norm(data=conv, epsilon=epsilon, name=name + "_bn")
-    act = sym.relu(data=bn, name=name + "_relu")
-    return act
-
-def separable_conv_block(data, name, depthwise_channels,
-                         pointwise_channels, kernel_size=(3, 3),
-                         downsample=False, padding=(1, 1),
-                         epsilon=1e-5):
-    """Helper function to get a separable conv block"""
-    if downsample:
-        strides = (2, 2)
-    else:
-        strides = (1, 1)
-    # depthwise convolution + bn + relu
-    conv1 = sym.conv2d(data=data, channels=depthwise_channels,
-                       groups=depthwise_channels, kernel_size=kernel_size, strides=strides,
-                       padding=padding, use_bias=False, layout="NCHW",
-                       name=name + "_depthwise_conv1")
-    bn1 = sym.batch_norm(data=conv1, epsilon=epsilon, name=name + "_bn1")
-    act1 = sym.relu(data=bn1, name=name + "_relu1")
-    # pointwise convolution + bn + relu
-    conv2 = sym.conv2d(data=act1, channels=pointwise_channels, kernel_size=(1, 1), strides=(1, 1),
-                       padding=(0, 0), use_bias=False, layout="NCHW", name=name + "_conv2")
-    bn2 = sym.batch_norm(data=conv2, epsilon=epsilon, name=name + "_bn2")
-    act2 = sym.relu(data=bn2, name=name + "_relu2")
-    return act2
-
-def mobile_net(num_classes=1000, alpha=1.0, is_shallow=False):
-    """Function to construct a MobileNet"""
-    data = sym.Variable("data")
-    body = conv_block(data, "conv_block_1", int(32*alpha), strides=(2, 2))
-    body = separable_conv_block(body, "separable_conv_block_1",
-                                int(32*alpha), int(64*alpha))
-    body = separable_conv_block(body, "separable_conv_block_2",
-                                int(64*alpha), int(128*alpha), downsample=True)
-    body = separable_conv_block(body, "separable_conv_block_3",
-                                int(128*alpha), int(128*alpha))
-    body = separable_conv_block(body, "separable_conv_block_4",
-                                int(128*alpha), int(256*alpha), downsample=True)
-    body = separable_conv_block(body, "separable_conv_block_5",
-                                int(256*alpha), int(256*alpha))
-    body = separable_conv_block(body, "separable_conv_block_6",
-                                int(256*alpha), int(512*alpha), downsample=True)
-    if is_shallow:
-        body = separable_conv_block(body, "separable_conv_block_7",
-                                    int(512*alpha), int(1024*alpha), downsample=True)
-        body = separable_conv_block(body, "separable_conv_block_8",
-                                    int(1024*alpha), int(1024*alpha))
-    else:
-        for i in range(7, 12):
-            body = separable_conv_block(body, "separable_conv_block_%d" % i,
-                                        int(512*alpha), int(512*alpha))
-        body = separable_conv_block(body, "separable_conv_block_12",
-                                    int(512*alpha), int(1024*alpha), downsample=True)
-        body = separable_conv_block(body, "separable_conv_block_13",
-                                    int(1024*alpha), int(1024*alpha))
-    pool = sym.global_avg_pool2d(data=body, name="pool")
-    flatten = sym.flatten(data=pool, name="flatten")
-    fc = sym.dense(data=flatten, units=num_classes, use_bias=False, name="fc")
-    softmax = sym.softmax(data=fc, name="softmax")
-    return softmax
-
-
-def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224), dtype="float32"):
-    """Get benchmark workload for mobilenet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = mobile_net(num_classes=num_classes, alpha=1.0, is_shallow=False)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/testing/mobilenet_v2.py b/nnvm/python/nnvm/testing/mobilenet_v2.py
deleted file mode 100644
index 87c4a2c7e9f5..000000000000
--- a/nnvm/python/nnvm/testing/mobilenet_v2.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-MobileNetV2, load model from gluon model zoo
-
-Reference:
-Inverted Residuals and Linear Bottlenecks:
-Mobile Networks for Classification, Detection and Segmentation
-https://arxiv.org/abs/1801.04381
-"""
-
-from .utils import create_workload
-from ..frontend.mxnet import _from_mxnet_impl
-
-def get_workload(batch_size, num_classes=1000, multiplier=1.0, dtype="float32"):
-    """Get benchmark workload for mobilenet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    multiplier : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    import mxnet as mx
-    from mxnet.gluon.model_zoo.vision.mobilenet import MobileNetV2
-
-    image_shape = (1, 3, 224, 224)
-
-    block = MobileNetV2(multiplier=multiplier, classes=num_classes)
-
-    data = mx.sym.Variable('data')
-    sym = block(data)
-    sym = mx.sym.SoftmaxOutput(sym)
-
-    net = _from_mxnet_impl(sym, {})
-
-    return create_workload(net, batch_size, image_shape[1:], dtype)
diff --git a/nnvm/python/nnvm/testing/resnet.py b/nnvm/python/nnvm/testing/resnet.py
deleted file mode 100644
index e63ceff7c3f0..000000000000
--- a/nnvm/python/nnvm/testing/resnet.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-'''
-# pylint: disable=unused-argument
-from .. import symbol as sym
-from . utils import create_workload
-
-def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True):
-    """Return ResNet Unit symbol for building ResNet
-    Parameters
-    ----------
-    data : str
-        Input data
-    num_filter : int
-        Number of output channels
-    bnf : int
-        Bottle neck channels factor with regard to num_filter
-    stride : tuple
-        Stride used in convolution
-    dim_match : Boolean
-        True means channel number between input and output is the same,
-        otherwise means differ
-    name : str
-        Base name of the operators
-    """
-    if bottle_neck:
-        bn1 = sym.batch_norm(data=data, epsilon=2e-5, name=name + '_bn1')
-        act1 = sym.relu(data=bn1, name=name + '_relu1')
-        conv1 = sym.conv2d(
-            data=act1, channels=int(num_filter*0.25), kernel_size=(1, 1),
-            strides=stride, padding=(0, 0), use_bias=False, name=name + '_conv1')
-        bn2 = sym.batch_norm(data=conv1, epsilon=2e-5, name=name + '_bn2')
-        act2 = sym.relu(data=bn2, name=name + '_relu2')
-        conv2 = sym.conv2d(
-            data=act2, channels=int(num_filter*0.25), kernel_size=(3, 3),
-            strides=(1, 1), padding=(1, 1), use_bias=False, name=name + '_conv2')
-        bn3 = sym.batch_norm(data=conv2, epsilon=2e-5, name=name + '_bn3')
-        act3 = sym.relu(data=bn3, name=name + '_relu3')
-        conv3 = sym.conv2d(
-            data=act3, channels=num_filter, kernel_size=(1, 1),
-            strides=(1, 1), padding=(0, 0), use_bias=False, name=name + '_conv3')
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = sym.conv2d(
-                data=act1, channels=num_filter, kernel_size=(1, 1),
-                strides=stride, use_bias=False, name=name+'_sc')
-        return sym.elemwise_add(conv3, shortcut)
-    else:
-        bn1 = sym.batch_norm(data=data, epsilon=2e-5, name=name + '_bn1')
-        act1 = sym.relu(data=bn1, name=name + '_relu1')
-        conv1 = sym.conv2d(
-            data=act1, channels=num_filter, kernel_size=(3, 3),
-            strides=stride, padding=(1, 1), use_bias=False, name=name + '_conv1')
-        bn2 = sym.batch_norm(data=conv1, epsilon=2e-5, name=name + '_bn2')
-        act2 = sym.relu(data=bn2, name=name + '_relu2')
-        conv2 = sym.conv2d(
-            data=act2, channels=num_filter, kernel_size=(3, 3),
-            strides=(1, 1), padding=(1, 1), use_bias=False, name=name + '_conv2')
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = sym.conv2d(
-                data=act1, channels=num_filter, kernel_size=(1, 1),
-                strides=stride, use_bias=False, name=name+'_sc')
-        return sym.elemwise_add(conv2, shortcut)
-
-def resnet(units, num_stages, filter_list, num_classes, image_shape,
-           bottle_neck=True):
-    """Return ResNet symbol of
-    Parameters
-    ----------
-    units : list
-        Number of units in each stage
-    num_stages : int
-        Number of stage
-    filter_list : list
-        Channel size of each stage
-    num_classes : int
-        Ouput size of symbol
-    dataset : str
-        Dataset type, only cifar10 and imagenet supports
-    """
-    num_unit = len(units)
-    assert num_unit == num_stages
-    data = sym.Variable(name='data')
-    data = sym.batch_norm(data=data, epsilon=2e-5, scale=False, name='bn_data')
-    (_, height, _) = image_shape
-    if height <= 32:            # such as cifar10
-        body = sym.conv2d(
-            data=data, channels=filter_list[0], kernel_size=(3, 3),
-            strides=(1, 1), padding=(1, 1), use_bias=False, name="conv0")
-    else:                       # often expected to be 224 such as imagenet
-        body = sym.conv2d(
-            data=data, channels=filter_list[0], kernel_size=(7, 7),
-            strides=(2, 2), padding=(3, 3), use_bias=False, name="conv0")
-        body = sym.batch_norm(data=body, epsilon=2e-5, name='bn0')
-        body = sym.relu(data=body, name='relu0')
-        body = sym.max_pool2d(data=body, pool_size=(3, 3), strides=(2, 2), padding=(1, 1))
-
-    for i in range(num_stages):
-        body = residual_unit(
-            body, filter_list[i+1], (1 if i == 0 else 2, 1 if i == 0 else 2),
-            False, name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck)
-        for j in range(units[i]-1):
-            body = residual_unit(
-                body, filter_list[i+1], (1, 1), True,
-                name='stage%d_unit%d' % (i + 1, j + 2), bottle_neck=bottle_neck)
-    bn1 = sym.batch_norm(data=body, epsilon=2e-5, name='bn1')
-    relu1 = sym.relu(data=bn1, name='relu1')
-    # Although kernel is not used here when global_pool=True, we should put one
-    pool1 = sym.global_avg_pool2d(data=relu1, name='pool1')
-    flat = sym.flatten(data=pool1)
-    fc1 = sym.dense(data=flat, units=num_classes, name='fc1')
-    return sym.softmax(data=fc1, name='softmax')
-
-def get_symbol(num_classes, num_layers=50, image_shape=(3, 224, 224), **kwargs):
-    """
-    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
-    Original author Wei Wu
-    """
-    (_, height, _) = image_shape
-    if height <= 28:
-        num_stages = 3
-        if (num_layers-2) % 9 == 0 and num_layers >= 164:
-            per_unit = [(num_layers-2)//9]
-            filter_list = [16, 64, 128, 256]
-            bottle_neck = True
-        elif (num_layers-2) % 6 == 0 and num_layers < 164:
-            per_unit = [(num_layers-2)//6]
-            filter_list = [16, 16, 32, 64]
-            bottle_neck = False
-        else:
-            raise ValueError("no experiments done on num_layers {}".format(num_layers))
-        units = per_unit * num_stages
-    else:
-        if num_layers >= 50:
-            filter_list = [64, 256, 512, 1024, 2048]
-            bottle_neck = True
-        else:
-            filter_list = [64, 64, 128, 256, 512]
-            bottle_neck = False
-        num_stages = 4
-        if num_layers == 18:
-            units = [2, 2, 2, 2]
-        elif num_layers == 34:
-            units = [3, 4, 6, 3]
-        elif num_layers == 50:
-            units = [3, 4, 6, 3]
-        elif num_layers == 101:
-            units = [3, 4, 23, 3]
-        elif num_layers == 152:
-            units = [3, 8, 36, 3]
-        elif num_layers == 200:
-            units = [3, 24, 36, 3]
-        elif num_layers == 269:
-            units = [3, 30, 48, 8]
-        else:
-            raise ValueError("no experiments done on num_layers {}".format(num_layers))
-
-    return resnet(units=units,
-                  num_stages=num_stages,
-                  filter_list=filter_list,
-                  num_classes=num_classes,
-                  image_shape=image_shape,
-                  bottle_neck=bottle_neck)
-
-def get_workload(batch_size=1, num_classes=1000, num_layers=18,
-                 image_shape=(3, 224, 224), dtype="float32", **kwargs):
-    """Get benchmark workload for resnet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    num_layers : int, optional
-        Number of layers
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    kwargs : dict
-        Extra arguments
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes, num_layers=num_layers,
-                     image_shape=image_shape, **kwargs)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/testing/squeezenet.py b/nnvm/python/nnvm/testing/squeezenet.py
deleted file mode 100644
index eab2cf06fee6..000000000000
--- a/nnvm/python/nnvm/testing/squeezenet.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from .. import symbol as sym
-from . utils import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0)
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1)
-    # NOTE : Assume NCHW layout here
-    net = sym.concatenate(left, right, axis=1)
-
-    return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
-    net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
-                     padding=(padding, padding))
-    net = sym.relu(net)
-    return net
-
-# Net
-def get_symbol(num_classes, version, **kwargs):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    num_classes: int
-        The number of classification results
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-    """
-    assert version in ['1.0', '1.1'], ("Unsupported SqueezeNet version {version}:"
-                                       "1.0 or 1.1 expected".format(version=version))
-    net = sym.Variable("data")
-    if version == '1.0':
-        net = sym.conv2d(net, channels=96, kernel_size=(7, 7), strides=(2, 2), padding=(3, 3))
-        net = sym.relu(net)
-        net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 32, 128, 128)
-        net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-        net = _make_fire(net, 64, 256, 256)
-    else:
-        net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1))
-        net = sym.relu(net)
-        net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 32, 128, 128)
-        net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = _make_fire(net, 64, 256, 256)
-    net = sym.dropout(net, rate=0.5)
-    net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
-    net = sym.relu(net)
-    net = sym.global_avg_pool2d(net)
-    net = sym.flatten(net)
-    return sym.softmax(net)
-
-def get_workload(batch_size=1, num_classes=1000, version='1.0',
-                 image_shape=(3, 224, 224), dtype="float32", **kwargs):
-    """Get benchmark workload for SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    kwargs : dict
-        Extra arguments
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes, version=version, **kwargs)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/testing/utils.py b/nnvm/python/nnvm/testing/utils.py
deleted file mode 100644
index 0bffc81a0663..000000000000
--- a/nnvm/python/nnvm/testing/utils.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Helper utility to create common workload for testing."""
-from __future__ import absolute_import as _abs
-
-import numpy as np
-import tvm
-from ..compiler import graph_util
-from ..import graph
-from . init import Xavier
-
-def create_workload(net, batch_size, image_shape=(3, 224, 224),
-                    dtype="float32", initializer=None, seed=0):
-    """Helper function to create benchmark workload for input network
-
-    Parameters
-    ----------
-    net : nnvm.Symbol
-        The selected network symbol to use
-
-    batch_size : int
-        The batch size used in the model
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    initializer : Initializer
-        The initializer used
-
-    seed : int
-        The seed used in initialization.
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    if image_shape is None:
-        image_shape = (3, 224, 224)
-    data_shape = (batch_size,) + image_shape
-    params = {}
-    g = graph.create(net)
-    input_shapes, _ = graph_util.infer_shape(g, data=data_shape)
-    shape_dict = dict(zip(g.index.input_names, input_shapes))
-    np.random.seed(seed)
-    initializer = initializer if initializer else Xavier()
-    for k, v in shape_dict.items():
-        if k == "data":
-            continue
-        init_value = np.zeros(v).astype(dtype)
-        initializer(k, init_value)
-        params[k] = tvm.nd.array(init_value, ctx=tvm.cpu(0))
-    return net, params
diff --git a/nnvm/python/nnvm/testing/vgg.py b/nnvm/python/nnvm/testing/vgg.py
deleted file mode 100644
index 2c290bdc3c68..000000000000
--- a/nnvm/python/nnvm/testing/vgg.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""References:
-
-Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
-large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
-"""
-from .. import symbol as sym
-from . utils import create_workload
-
-def get_feature(internel_layer, layers, filters, batch_norm=False):
-    """Get VGG feature body as stacks of convoltions."""
-    for i, num in enumerate(layers):
-        for j in range(num):
-            internel_layer = sym.conv2d(
-                data=internel_layer, kernel_size=(3, 3), padding=(1, 1),
-                channels=filters[i], name="conv%s_%s"%(i + 1, j + 1))
-            if batch_norm:
-                internel_layer = sym.batch_norm(
-                    data=internel_layer, name="bn%s_%s" %(i + 1, j + 1))
-            internel_layer = sym.relu(data=internel_layer, name="relu%s_%s" %(i + 1, j + 1))
-        internel_layer = sym.max_pool2d(
-            data=internel_layer, pool_size=(2, 2), strides=(2, 2), name="pool%s"%(i + 1))
-    return internel_layer
-
-def get_classifier(input_data, num_classes):
-    """Get VGG classifier layers as fc layers."""
-    flatten = sym.flatten(data=input_data, name="flatten")
-    fc6 = sym.dense(data=flatten, units=4096, name="fc6")
-    relu6 = sym.relu(data=fc6, name="relu6")
-    drop6 = sym.dropout(data=relu6, rate=0.5, name="drop6")
-    fc7 = sym.dense(data=drop6, units=4096, name="fc7")
-    relu7 = sym.relu(data=fc7, name="relu7")
-    drop7 = sym.dropout(data=relu7, rate=0.5, name="drop7")
-    fc8 = sym.dense(data=drop7, units=num_classes, name="fc8")
-    return fc8
-
-def get_symbol(num_classes, num_layers=11, batch_norm=False):
-    """
-    Parameters
-    ----------
-    num_classes : int, default 1000
-        Number of classification classes.
-    num_layers : int
-        Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
-    batch_norm : bool, default False
-        Use batch normalization.
-    """
-    vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
-                13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
-                16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
-                19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])}
-    if num_layers not in vgg_spec:
-        raise ValueError("Invalide num_layers {}. Choices are 11,13,16,19.".format(num_layers))
-    layers, filters = vgg_spec[num_layers]
-    data = sym.Variable(name="data")
-    feature = get_feature(data, layers, filters, batch_norm)
-    classifier = get_classifier(feature, num_classes)
-    symbol = sym.softmax(data=classifier, name='softmax')
-    return symbol
-
-def get_workload(batch_size, num_classes=1000, image_shape=(3, 224, 224),
-                 dtype="float32", **kwargs):
-    """Get benchmark workload for VGG nets.
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of claseses
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    kwargs : dict
-        Extra arguments
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes, **kwargs)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/python/nnvm/to_relay.py b/nnvm/python/nnvm/to_relay.py
deleted file mode 100644
index 94a736dabe70..000000000000
--- a/nnvm/python/nnvm/to_relay.py
+++ /dev/null
@@ -1,507 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=no-else-return, unidiomatic-typecheck, invalid-name, unused-argument
-"""Convert an NNVM graph to Relay."""
-import numpy
-
-from tvm import relay, nd
-from tvm.relay import op, expr, var
-from tvm.relay.frontend.common import StrAttrsDict
-from tvm.relay.frontend.nnvm_common import _rename, _binop_scalar, _rbinop_scalar, \
-     _elemwise_sum, _softmax_op, _compare, _reduce
-from .symbol import Symbol
-from .compiler import graph_attr
-from .graph import create as graph_create
-
-def _nn_batch_flatten(children, attrs, odtype='float32'):
-    assert len(children) == 1
-    return op.nn.batch_flatten(children[0])
-
-
-def _dense(children, attrs, odtype='float32'):
-    use_bias = attrs.get_bool('use_bias', True)
-    units = attrs.get_int('units')
-    dense = op.nn.dense(children[0], children[1], units=units)
-    if use_bias:
-        return op.nn.bias_add(dense, children[2])
-    else:
-        return dense
-
-def _conv2d(children, attrs, odtype='float32'):
-    use_bias = attrs.get_bool('use_bias', True)
-
-    if use_bias:
-        data, weight, bias = children
-    else:
-        data, weight = children
-
-    kernel_size = attrs.get_int_tuple('kernel_size')
-    channels = attrs.get_int('channels')
-    strides = attrs.get_int_tuple('strides', (1, 1))
-    padding = attrs.get_int_tuple('padding', (0, 0))
-    dilation = attrs.get_int_tuple('dilation', (1, 1))
-    groups = attrs.get_int('groups', 1)
-    data_layout = attrs.get_str('layout', 'NCHW')
-    kernel_layout = attrs.get_str('kernel_layout', 'OIHW')
-    out_layout = ''
-    out_dtype = attrs.get_str('out_dtype', '')
-
-    conv_out = op.nn.conv2d(
-        data,
-        weight,
-        kernel_size=kernel_size,
-        channels=channels,
-        strides=strides,
-        padding=padding,
-        dilation=dilation,
-        groups=groups,
-        data_layout=data_layout,
-        kernel_layout=kernel_layout,
-        out_layout=out_layout,
-        out_dtype=out_dtype)
-
-    if use_bias:
-        return op.nn.bias_add(conv_out, bias)
-    else:
-        return conv_out
-
-
-def _conv2d_transpose(children, attrs, odtype='float32'):
-    use_bias = attrs.get_bool('use_bias', False)
-
-    if use_bias:
-        data, weight, bias = children
-    else:
-        data, weight = children
-
-    strides = attrs.get_int_tuple('strides', (1, 1))
-    padding = attrs.get_int_tuple('padding', (0, 0))
-    dilation = attrs.get_int_tuple('dilation', (1, 1))
-    groups = attrs.get_int('groups', 1)
-    data_layout = attrs.get_str('layout', 'NCHW')
-    kernel_layout = attrs.get_str('kernel_layout', 'OIHW')
-    out_dtype = attrs.get_str('out_dtype', '')
-
-    out_conv2d = op.nn.conv2d_transpose(
-        data,
-        weight,
-        strides=strides,
-        padding=padding,
-        dilation=dilation,
-        groups=groups,
-        data_layout=data_layout,
-        kernel_layout=kernel_layout,
-        out_dtype=out_dtype)
-
-    if use_bias:
-        return op.nn.bias_add(out_conv2d, bias)
-    else:
-        return out_conv2d
-
-
-def _batch_norm(children, attrs, odtype='float32'):
-    data, gamma, beta, moving_mean, moving_view = children
-    axis = attrs.get_int('axis', 1)
-    epsilon = attrs.get_float('epsilon', 1e-05)
-    center = attrs.get_bool('center', True)
-    scale = attrs.get_bool('scale', True)
-
-    return op.nn.batch_norm(
-        data,
-        gamma,
-        beta,
-        moving_mean,
-        moving_view,
-        axis=axis,
-        epsilon=epsilon,
-        center=center,
-        scale=scale)[0]
-
-
-def _max_pool2d(children, attrs, odtype='float32'):
-    assert len(children) == 1
-    data = children[0]
-    pool_size = attrs.get_int_tuple('pool_size', (1, 1))
-    strides = attrs.get_int_tuple('strides', (1, 1))
-    padding = attrs.get_int_tuple('padding', (0, 0))
-    layout = attrs.get_str('layout', 'NCHW')
-    ceil_mode = attrs.get_bool('ceil_mode', False)
-
-    return op.nn.max_pool2d(
-        data,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        layout=layout,
-        ceil_mode=ceil_mode)
-
-
-def _reshape(children, attrs, odtype='float32'):
-    data = children[0]
-    shape = attrs.get_int_list('shape')
-    return op.reshape(data, shape)
-
-
-def _transpose(children, attrs, odtype='float32'):
-    axes = attrs.get_int_list('axes', None)
-    return op.transpose(children[0], axes=axes)
-
-
-def _clip(children, attrs, odtype='float32'):
-    a_min = attrs.get_float('a_min')
-    a_max = attrs.get_float('a_max')
-    return op.clip(children[0], a_min, a_max)
-
-
-def _cast(children, attrs, odtype='float32'):
-    data = children[0]
-    dtype = attrs.get_str('dtype')
-    return data.astype(dtype)
-
-
-def _expand_dims(children, attrs, odtype='float32'):
-    data = children[0]
-    axis = attrs.get_int('axis')
-    num_newaxis = attrs.get_int('num_newaxis', 1)
-    return op.transform.expand_dims(data, axis, num_newaxis=num_newaxis)
-
-
-def broadcast_to(children, attrs, odtype='float32'):
-    # TODO(@jroesch) export broadcast to?
-    data = children[0]
-    shape = attrs.get_int_tuple('shape')
-    array = numpy.zeros(shape).astype(odtype)
-    rconst = relay.Constant(nd.array(array))
-    return op.broadcast_to_like(data, rconst)
-
-
-def _global_avg_pool2d(children, attrs, odtype='float32'):
-    data = children[0]
-    layout = attrs.get_str('layout', "NCHW")
-    return op.nn.global_avg_pool2d(data, layout)
-
-
-def _avg_pool2d(children, attrs, odtype='float32'):
-    data = children[0]
-    pool_size = attrs.get_int_tuple('pool_size', (1, 1))
-    strides = attrs.get_int_tuple('strides', (1, 1))
-    padding = attrs.get_int_tuple('padding', (0, 0))
-    layout = attrs.get_str('layout', "NCHW")
-    ceil_mode = attrs.get_bool('ceil_mode', False)
-    count_include_pad = attrs.get_bool('layout', False)
-    return op.nn.avg_pool2d(
-        data,
-        pool_size=pool_size,
-        strides=strides,
-        padding=padding,
-        layout=layout,
-        ceil_mode=ceil_mode,
-        count_include_pad=count_include_pad)
-
-
-def _upsampling(children, attrs, odtype='float32'):
-    scale = attrs.get_int('scale')
-    layout = attrs.get_str('layout', 'NCHW')
-    method = attrs.get_str('method', 'NEAREST_NEIGHBOR')
-    return op.nn.upsampling(
-        children[0],
-        scale_h=scale,
-        scale_w=scale,
-        layout=layout,
-        method=method)
-
-
-def _pad(children, attrs, odtype='float32'):
-    pad_value = attrs.get_float('pad_value', 0.0)
-    pad_width = attrs.get_tuple_tuple_int('pad_width')
-    return op.nn.pad(children[0], pad_width, pad_value=pad_value)
-
-def _leaky_relu(children, attrs, odtype='float32'):
-    alpha = attrs.get_float('alpha')
-    return op.nn.leaky_relu(children[0], alpha)
-
-
-def _full_like(children, attrs, odtype='float32'):
-    fill_value = relay.const(attrs.get_float('fill_value'), dtype='float32')
-    return op.full_like(children[0], fill_value)
-
-
-def _strided_slice(children, attrs, odtype='float32'):
-    begin = attrs.get_int_list('begin')
-    end = attrs.get_int_list('end')
-    strides = attrs.get_int_list('stride', None)
-    return op.strided_slice(children[0], begin, end, strides=strides)
-
-
-def _split(children, attrs, odtype='float32'):
-    indices_or_sections = None
-    try:
-        indices_or_sections = attrs.get_int('indices_or_sections', None)
-    except ValueError:
-        indices_or_sections = indices_or_sections or attrs.get_int_tuple(
-            'indices_or_sections')
-
-    axis = attrs.get_int('axis', 0)
-
-    return op.split(children[0], indices_or_sections, axis)
-
-def _squeeze(children, attrs, odtype='float32'):
-    axis = attrs.get_int_tuple('axis', None)
-    axis = [axis] if isinstance(axis, int) else axis
-
-    return op.squeeze(children[0], axis)
-
-def _concatenate(children, attrs, odtype='float32'):
-    axis = attrs.get_int('axis', 1)
-    return op.concatenate(children, axis)
-
-def _dropout(children, attrs, odtype='float32'):
-    rate = attrs.get_float('rate', 0.5)
-    return op.nn.dropout(children[0], rate)
-
-def _mean(children, attrs, odtype='float32'):
-    axis = attrs.get_int_tuple('axis', None)
-    keepdims = attrs.get_bool('keepdims')
-
-    return op.mean(children[0], axis, keepdims)
-
-
-def _prelu(children, attrs, odtype='float32'):
-    axis = attrs.get_int('axis', 1)
-    return op.nn.prelu(children[0], children[1], axis)
-
-
-def _lrn(children, attrs, odtype='float32'):
-    size = attrs.get_int("size", 5)
-    axis = attrs.get_int("axis", 1)
-    bias = attrs.get_float("bias", 2)
-    alpha = attrs.get_float("alpha", 1e-05)
-    beta = attrs.get_float("beta", 0.75)
-    return op.nn.lrn(children[0], size, axis, bias, alpha, beta)
-
-
-def _l2_nomalize(children, attrs, odtype='float32'):
-    eps = attrs.get_float('eps')
-    axis = attrs.get_int_tuple('axis', None)
-    return op.nn.l2_normalize(children[0], eps, axis)
-
-
-def _take(children, attrs, odtype='float32'):
-    axis = attrs.get_int('axis', None)
-    return op.take(children[0], children[1], axis)
-
-
-def _matmul(children, attrs, odtype='float32'):
-    input_1_t = op.transpose(children[1], axes=(1, 0))
-    return op.nn.dense(children[0], input_1_t)
-
-
-def _collapse_sum(children, attrs, odtype='float32'):
-    for key in ["axis", "keepdims", "exclude"]:
-        if key in attrs.attrs:
-            raise NotImplementedError("Parameter '" + key + "' is not supported.")
-    return op.collapse_sum_like(children[0], children[1])
-
-
-def _not_implemented(new_op):
-    def _impl(children, attrs, odtype='float32'):
-        raise NotImplementedError(str(new_op) + " is not implemented.")
-    return _impl
-
-
-NNVM_OP_2_RELAY_OP = {
-    'flatten': _nn_batch_flatten,
-    'dense': _dense,
-    'softmax': _softmax_op(op.nn.softmax),
-    'log_softmax': _softmax_op(op.nn.log_softmax),
-    'conv2d': _conv2d,
-    'batch_norm': _batch_norm,
-    'max_pool2d': _max_pool2d,
-    'reshape': _reshape,
-    'transpose': _transpose,
-    'dropout': _dropout,
-    'mean': _mean,
-    # Addition
-    '__add_scalar__': _binop_scalar(op.add),
-    'broadcast_add' : _rename(op.add),
-    'elemwise_add'  : _rename(op.add),
-    # Subtraction
-    '__sub_scalar__' : _binop_scalar(op.subtract),
-    '__rsub_scalar__': _rbinop_scalar(op.subtract),
-    'broadcast_sub'  : _rename(op.subtract),
-    'elemwise_sub'   : _rename(op.subtract),
-    # Multiply
-    '__mul_scalar__': _binop_scalar(op.multiply),
-    'broadcast_mul' : _rename(op.multiply),
-    'elemwise_mul'  : _rename(op.multiply),
-    # Division
-    '__div_scalar__': _binop_scalar(op.divide),
-    'broadcast_div' : _rename(op.divide),
-    'elemwise_div'  : _rename(op.divide),
-    'broadcast_mod' : _rename(op.mod),
-    # Negative
-    'negative': _rename("negative"),
-    # Power
-    '__pow_scalar__': _binop_scalar(op.power),
-    '__rpow_scalar__': _rbinop_scalar(op.power),
-    'broadcast_pow': _rename(op.power),
-    # Sum
-    'sum': _reduce(op.sum),
-    'elemwise_sum': _elemwise_sum,
-    'collapse_sum': _collapse_sum,
-    'broadcast_max': _rename(op.maximum),
-    'broadcast_min': _rename(op.minimum),
-
-    # Comparsion
-    'greater': _compare(op.greater),
-    'broadcast_greater': _compare(op.greater),
-    'greater_equal': _compare(op.greater_equal),
-    'broadcast_greater_equal': _compare(op.greater_equal),
-    'less': _compare(op.less),
-    'broadcast_less': _compare(op.less),
-    'less_equal': _compare(op.less_equal),
-    'broadcast_less_equal': _compare(op.less_equal),
-    'broadcast_equal': _compare(op.equal),
-    'broadcast_not_equal': _compare(op.not_equal),
-
-    # Activations
-    'sigmoid': _rename('sigmoid'),
-    'relu': _rename('nn.relu'),
-    'exp': _rename('exp'),
-    'log': _rename('log'),
-    'tanh': _rename('tanh'),
-    'leaky_relu': _leaky_relu,
-    'prelu': _prelu,
-    'clip': _clip,
-    'round': _rename('round'),
-    'cast': _cast,
-    'expand_dims': _expand_dims,
-    'broadcast_to': broadcast_to,
-    '__lshift_scalar__': _binop_scalar(op.left_shift),
-    '__rshift_scalar__': _binop_scalar(op.right_shift),
-    'broadcast_left_shift': _rename(op.left_shift),
-    'broadcast_right_shift': _rename(op.right_shift),
-    'copy': _rename(op.copy),
-    'global_avg_pool2d': _global_avg_pool2d,
-    'avg_pool2d': _avg_pool2d,
-    'conv2d_transpose': _conv2d_transpose,
-    'upsampling': _upsampling,
-    'pad': _pad,
-    'full_like': _full_like,
-    'strided_slice': _strided_slice,
-    'split': _split,
-    'squeeze': _squeeze,
-    'concatenate': _concatenate,
-    'abs': _rename(op.abs),
-    'ceil': _rename(op.ceil),
-    'floor': _rename(op.floor),
-    'trunc': _rename(op.trunc),
-    'take': _take,
-    'lrn': _lrn,
-    'l2_normalize': _l2_nomalize,
-    'matmul': _matmul,
-    'zeros_like': _rename(op.zeros_like),
-    'reshape_like': _rename(op.reshape_like),
-    'ones_like': _rename(op.ones_like),
-
-    'expand_like': _not_implemented("expand_like"),
-    'gather_nd': _not_implemented("gather_nd"),
-    'block_grad': _not_implemented("block_grad"),
-}
-
-
-def to_relay(graph, shape_dict, dtype_dict, params):
-    """Convert an NNVM graph into the corresponding Relay expression.
-
-    Parameters
-    ----------
-    graph : Graph
-       The input graph.
-
-    shape_dict : dict of str to shape
-       The input shape.
-
-    dtype_dict : dict of str to str/dtype
-       The input shape.
-
-    params : dict of str to array
-        The parameters.
-
-    Returns
-    -------
-    (expr, params) : Tuple[relay.Expr, dict of str to array]
-        The corresponding Relay expression and parameters.
-    """
-    if isinstance(graph, Symbol):
-        graph = graph_create(graph)
-
-    param_shapes = dict((k, params[k].shape) for k in params)
-    shape_dict = shape_dict.copy()
-    shape_dict.update(param_shapes)
-    graph = graph_attr.set_shape_inputs(graph, shape_dict)
-    graph = graph_attr.set_dtype_inputs(graph, dtype_dict)
-    graph = graph.apply(["InferShape", "InferType"])
-    shape = graph.json_attr("shape")
-    dtype = [graph_attr.TCODE_TO_DTYPE[di] for di in graph.json_attr("dtype")]
-
-    gidx = graph.index
-    relay_map = {}
-    fn_params = []
-
-    for nid, node in enumerate(gidx.nodes):
-        children = []
-        for i in node['inputs']:
-            child = relay_map[i[0]]
-            if isinstance(child, expr.TupleWrapper):
-                children.append(child[i[1]])
-            else:
-                children.append(child)
-
-        oshape = shape[gidx.entry_id(nid, 0)]
-        odtype = dtype[gidx.entry_id(nid, 0)]
-        attrs = node.get("attrs", {})
-        node_name = node["name"]
-        op_name = node["op"]
-
-        if op_name == "null":
-            v = var(node_name, shape=oshape, dtype=odtype)
-            fn_params.append(v)
-            relay_map[nid] = v
-        else:
-            if op_name in NNVM_OP_2_RELAY_OP:
-                str_attrs = StrAttrsDict(attrs)
-                call = NNVM_OP_2_RELAY_OP[op_name](children, str_attrs, odtype)
-                relay_map[nid] = call
-            else:
-                raise Exception(
-                    "nnvm.to_relay: unsupported operator: {0}".format(op_name))
-
-    outputs = []
-    for nid, idx, _ in gidx.output_entries:
-        output = relay_map[nid]
-        if isinstance(output, expr.TupleWrapper):
-            outputs.append(output[idx])
-        else:
-            outputs.append(output)
-
-    if len(outputs) == 1:
-        body = outputs[0]
-    else:
-        body = expr.Tuple(outputs)
-
-    func = relay.Function(fn_params, body)
-    return func, params
diff --git a/nnvm/python/nnvm/top/__init__.py b/nnvm/python/nnvm/top/__init__.py
deleted file mode 100644
index db80df03e269..000000000000
--- a/nnvm/python/nnvm/top/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Tensor operator property registry
-
-Provide information to lower and schedule tensor operators.
-"""
-from .attr_dict import AttrDict
-from . import tensor
-from . import nn
-from . import transform
-from . import reduction
-from . import vision
-from . import image
-
-from .registry import OpPattern
-from .registry import register_compute, register_schedule, register_pattern
diff --git a/nnvm/python/nnvm/top/attr_dict.py b/nnvm/python/nnvm/top/attr_dict.py
deleted file mode 100644
index 5082a587d5a0..000000000000
--- a/nnvm/python/nnvm/top/attr_dict.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Attr dictionary object used by schedule functions"""
-import tvm
-
-_dict_get = tvm.get_global_func("nnvm.compiler._dict_get")
-_dict_size = tvm.get_global_func("nnvm.compiler._dict_size")
-_dict_keys = tvm.get_global_func("nnvm.compiler._dict_keys")
-
-class AttrDict(object):
-    """Attribute dictionary in nnvm.
-
-    Used by python registration of compute and schedule function.
-    AttrDict is passed as the first argument to schedule and compute function.
-    """
-    _tvm_tcode = 18
-
-    def __init__(self, handle):
-        self.handle = handle
-
-    def __del__(self):
-        tvm.nd.free_extension_handle(self.handle, 18)
-
-    @property
-    def _tvm_handle(self):
-        return self.handle.value
-
-    def __getitem__(self, key):
-        return _dict_get(self, key)
-
-    def keys(self):
-        """Get list of keys in the dict.
-
-        Returns
-        -------
-        keys : list of str
-            List of keys
-        """
-        return [x.value for x in _dict_keys(self)]
-
-    def get_int_tuple(self, key):
-        """Get tuple of integer from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        tuple : tuple of int
-            The result tuple
-        """
-        return tuple(int(x) for x in self[key][1:-1].split(",") if x)
-
-    def get_int_pair_tuple(self, key):
-        """Get tuple of integer pairs from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        tuple : tuple of int pairs
-            The result tuple
-        """
-        flat = [int(x.strip(' [] ')) for x in self[key][1:-1].split(",")]
-        return tuple((flat[i], flat[i+1]) for i in range(0, len(flat), 2))
-
-    def get_int(self, key):
-        """Get integer from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        value : int
-            The result value
-        """
-        return int(self[key])
-
-    def get_float_tuple(self, key):
-        """Get tuple of float from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        tuple : tuple of float
-            The result tuple
-        """
-        return tuple(float(x) for x in self[key][1:-1].split(",") if x)
-
-    def get_float(self, key):
-        """Get float from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        value : float
-            The result value
-        """
-        return float(self[key])
-
-    def get_bool(self, key):
-        """Get bool from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        value : bool
-            The result value
-        """
-        lowercase = self[key].lower()
-        if lowercase == "1":
-            return True
-        if lowercase == "0":
-            return False
-        if lowercase == "true":
-            return True
-        if lowercase == "false":
-            return False
-        raise ValueError("Wrong bool format for key %s" % key)
-
-    def get_str(self, key):
-        """Get string from attr dict
-
-        Parameters
-        ----------
-        key : str
-            The attr key
-
-        Returns
-        -------
-        value : str
-            The result value
-        """
-        return self[key]
-
-    def __repr__(self):
-        return str({k : self[k] for k in self.keys()})
-
-
-tvm.register_extension(AttrDict, AttrDict)
diff --git a/nnvm/python/nnvm/top/image.py b/nnvm/python/nnvm/top/image.py
deleted file mode 100644
index 4367d982985c..000000000000
--- a/nnvm/python/nnvm/top/image.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Definition of image ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from . import registry as reg
-from .registry import OpPattern
-
-# resize
-@reg.register_schedule("resize")
-def schedule_resize(_, outs, target):
-    """Schedule definition of resize"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_injective(outs)
-
-reg.register_pattern("resize", OpPattern.INJECTIVE)
diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
deleted file mode 100644
index 521b7f4b1da0..000000000000
--- a/nnvm/python/nnvm/top/nn.py
+++ /dev/null
@@ -1,456 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument, missing-docstring, no-else-return
-"""Definition of nn ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from topi.util import get_const_int, get_const_tuple
-from .tensor import _fschedule_broadcast, _fschedule_injective
-from . import registry as reg
-from .registry import OpPattern
-
-# relu
-reg.register_schedule("relu", _fschedule_broadcast)
-reg.register_pattern("relu", OpPattern.ELEMWISE)
-
-
-# leaky_relu
-reg.register_schedule("leaky_relu", _fschedule_broadcast)
-reg.register_pattern("leaky_relu", OpPattern.ELEMWISE)
-
-# prelu
-reg.register_schedule("prelu", _fschedule_broadcast)
-reg.register_pattern("prelu", OpPattern.BROADCAST)
-
-# flatten
-reg.register_schedule("flatten", _fschedule_broadcast)
-reg.register_pattern("flatten", OpPattern.INJECTIVE)
-
-
-# pad
-reg.register_schedule("pad", _fschedule_broadcast)
-reg.register_pattern("pad", OpPattern.INJECTIVE)
-
-
-# layout transform
-reg.register_schedule("__layout_transform__", _fschedule_injective)
-reg.register_pattern("__layout_transform__", OpPattern.INJECTIVE)
-
-
-@reg.register_schedule("softmax")
-def schedule_softmax(_, outs, target):
-    """Schedule definition of softmax"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_softmax(outs)
-
-reg.register_pattern("softmax", OpPattern.OPAQUE)
-
-
-# log softmax
-@reg.register_schedule("log_softmax")
-def schedule_log_softmax(_, outs, target):
-    """Schedule definition of softmax"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_softmax(outs)
-
-# Mark softmax as extern as we do not fuse it in call cases
-reg.register_pattern("log_softmax", OpPattern.OPAQUE)
-
-
-# dense
-@reg.register_compute("dense")
-def compute_dense(attrs, inputs, _):
-    """Compute definition of dense"""
-    if attrs.get_bool("use_bias"):
-        return topi.nn.dense(inputs[0], inputs[1], inputs[2])
-    return topi.nn.dense(inputs[0], inputs[1])
-
-@reg.register_schedule("dense")
-def schedule_dense(_, outs, target):
-    """Schedule definition of dense"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_dense(outs)
-
-reg.register_pattern("dense", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-#matmul
-reg.register_pattern("matmul", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("matmul", _fschedule_injective)
-
-# conv2d
-@reg.register_compute("conv2d")
-def compute_conv2d(attrs, inputs, _):
-    """Compute definition of conv2d"""
-    padding = attrs.get_int_tuple("padding")
-    strides = attrs.get_int_tuple("strides")
-    dilation = attrs.get_int_tuple("dilation")
-    groups = attrs.get_int("groups")
-    channels = attrs.get_int("channels")
-    layout = attrs["layout"]
-    kernel_layout = attrs["kernel_layout"]
-    out_dtype = attrs["out_dtype"]
-    out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
-    assert layout in ["NCHW", "NHWC", "NCHW4c"]
-    (dilation_h, dilation_w) = dilation
-    if dilation_h < 1 or dilation_w < 1:
-        raise ValueError("dilation should be positive value")
-
-    if groups == 1 and layout == 'NCHW4c' and inputs[0].dtype == 'int8':
-        # pylint: disable=assignment-from-no-return
-        out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding,
-                             dilation, layout, out_dtype)
-        # pylint: enable=assignment-from-no-return
-    elif groups == 1:
-        out = topi.nn.conv2d(
-            inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype)
-    elif layout == "NCHW" and \
-         groups == get_const_int(inputs[0].shape[1]) and \
-         groups == channels:
-        out = topi.nn.depthwise_conv2d_nchw(
-            inputs[0], inputs[1], strides, padding, dilation, out_dtype)
-    elif layout in ["NCHW", "NCHW4c"]:
-        out = topi.nn.group_conv2d_nchw(inputs[0], inputs[1], strides, padding, dilation, groups,
-                                        out_dtype)
-    elif layout == "NHWC" and \
-         kernel_layout == "HWOI" and \
-         groups == get_const_int(inputs[0].shape[3]) and \
-         groups == channels:
-        out = topi.nn.depthwise_conv2d_nhwc(
-            inputs[0], inputs[1], strides, padding, dilation, out_dtype)
-    else:
-        raise ValueError("not support arbitrary group number for now")
-
-    if attrs.get_bool("use_bias"):
-        bias = inputs[2]
-        expand_axis = 1 if layout in ["NCHW", "NCHW4c"] else 0
-        bias = topi.expand_dims(bias, axis=expand_axis, num_newaxis=2)
-        out = topi.add(out, bias)
-    return out
-
-@reg.register_schedule("conv2d")
-def schedule_conv2d(attrs, outs, target):
-    """Schedule definition of conv2d"""
-    groups = attrs.get_int("groups")
-    channels = attrs.get_int("channels")
-    layout = attrs["layout"]
-    kernel_layout = attrs["kernel_layout"]
-
-    with tvm.target.create(target):
-        if groups == 1 and layout == "NCHW":
-            return topi.generic.schedule_conv2d_nchw(outs)
-        elif groups == 1 and layout == "NCHW4c":
-            return topi.generic.schedule_conv2d_nchw(outs)
-        elif groups == 1 and layout == "NHWC":
-            return topi.generic.schedule_conv2d_nhwc(outs)
-        elif groups == channels and layout == "NCHW":
-            return topi.generic.schedule_depthwise_conv2d_nchw(outs)
-        elif groups == channels and layout == "NHWC" and kernel_layout == "HWOI":
-            return topi.generic.schedule_depthwise_conv2d_nhwc(outs)
-        elif layout in ["NCHW", "NCHW4c"]:
-            return topi.generic.schedule_group_conv2d_nchw(outs)
-        else:
-            raise ValueError("No compatible schedule")
-
-@reg.register_alter_op_layout("conv2d")
-def alter_conv2d_layout(attrs, inputs, tinfos):
-    """Replace conv2d op with other layouts or algorithms"""
-    import nnvm.symbol as sym
-
-    # map relay op names to nnvm op names
-    sym.contrib_conv2d_winograd_without_weight_transform = \
-            sym.contrib.conv2d_winograd_without_weight_transform
-    sym.contrib_conv2d_winograd_weight_transform = \
-            sym.contrib.conv2d_winograd_weight_transform
-    sym.contrib_conv2d_winograd_nnpack_without_weight_transform = \
-            sym.contrib.conv2d_winograd_nnpack_without_weight_transform
-    sym.contrib_conv2d_winograd_nnpack_weight_transform = \
-            sym.contrib.conv2d_winograd_nnpack_weight_transform
-    sym.nn = sym
-
-    # map relay argument names to nnvm argument names
-    raw_reshape = sym.reshape
-    def _reshape(*args, **kwargs):
-        if "newshape" in kwargs:
-            kwargs['shape'] = kwargs.pop('newshape')
-        return raw_reshape(*args, **kwargs)
-    sym.reshape = _reshape
-
-    return topi.nn.conv2d_alter_layout(attrs, inputs, tinfos, sym)
-
-reg.register_pattern("conv2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-# convolution NCHWc
-@reg.register_compute("_contrib_conv2d_NCHWc")
-def compute_contrib_conv2d_NCHWc(attrs, inputs, _):
-    """Compute definition of conv2d NCHWc"""
-    padding = attrs.get_int_tuple("padding")
-    strides = attrs.get_int_tuple("strides")
-    dilation = attrs.get_int_tuple("dilation")
-    out_channel = attrs.get_int("channels")
-    groups = attrs.get_int("groups")
-    layout = attrs.get_str("layout")
-    out_layout = attrs.get_str("out_layout")
-    out_dtype = attrs.get_str("out_dtype")
-    out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
-    if layout == "NCHW":
-        _, in_channel, _, _ = get_const_tuple(inputs[0].shape)
-    else:
-        _, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape)
-        in_channel = in_channel_chunk * in_channel_block
-    assert dilation == (1, 1), "not support dilate now"
-    if groups == 1:
-        # pylint: disable=assignment-from-no-return
-        out = topi.nn.conv2d_NCHWc(inputs[0], inputs[1], strides, padding, dilation,
-                                   layout, out_layout, out_dtype)
-        # pylint: enable=assignment-from-no-return
-    elif groups == in_channel and groups == out_channel:
-        # pylint: disable=assignment-from-no-return
-        out = topi.nn.depthwise_conv2d_NCHWc(inputs[0], inputs[1], strides, padding,
-                                             dilation, layout, out_layout, out_dtype)
-        # pylint: enable=assignment-from-no-return
-    else:
-        raise ValueError("not support arbitrary group number > 1 for now")
-    if attrs.get_bool("use_bias"):
-        bias = inputs[2]
-        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
-        out = topi.add(out, bias)
-    return out
-
-@reg.register_schedule("_contrib_conv2d_NCHWc")
-def schedule_contrib_conv2d_NCHWc(attrs, outs, target):
-    """Schedule definition of conv2d NCHWc"""
-    groups = attrs.get_int("groups")
-    out_channel = attrs.get_int("channels")
-    with tvm.target.create(target):
-        if groups == 1:
-            return topi.generic.schedule_conv2d_NCHWc(outs)
-        elif groups == out_channel:
-            return topi.generic.schedule_depthwise_conv2d_NCHWc(outs)
-        else:
-            raise ValueError("not support group number > 1 for now")
-
-reg.register_pattern("_contrib_conv2d_NCHWc", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_weight_transform")
-def compute_contrib_conv2d_winograd_weight_transform(attrs, inputs, _):
-    return topi.nn.conv2d_winograd_weight_transform(inputs[0], attrs.get_int('tile_size'))
-
-@reg.register_schedule("_contrib_conv2d_winograd_weight_transform")
-def schedule_contrib_conv2d_winograd_weight_transform(attrs, outs, target):
-    with tvm.target.create(target):
-        return topi.generic.schedule_conv2d_winograd_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_weight_transform", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_without_weight_transform")
-def compute_contrib_conv2d_winograd_without_weight_transform(attrs, inputs, _):
-    """Compute definition of conv2d NCHWc"""
-    padding = attrs.get_int_tuple("padding")
-    strides = attrs.get_int_tuple("strides")
-    dilation = attrs.get_int_tuple("dilation")
-    groups = attrs.get_int("groups")
-    layout = attrs.get_str("layout")
-    out_dtype = attrs.get_str("out_dtype")
-    tile_size = attrs.get_int("tile_size")
-    out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
-    assert dilation == (1, 1), "Do not support dilate now"
-    assert groups == 1, "Do not supoort arbitrary group number"
-
-    # pylint: disable=assignment-from-no-return
-    out = topi.nn.conv2d_winograd_without_weight_transform(
-        inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype,
-        tile_size)
-
-    if attrs.get_bool("use_bias"):
-        bias = inputs[2]
-        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
-        out = topi.add(out, bias)
-    return out
-
-@reg.register_schedule("_contrib_conv2d_winograd_without_weight_transform")
-def schedule_contrib_conv2d_winograd_without_weight_transform(attrs, outs, target):
-    with tvm.target.create(target):
-        return topi.generic.schedule_conv2d_winograd_without_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_without_weight_transform",
-                     OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_nnpack_weight_transform")
-def compute_contrib_conv2d_winograd_nnpack_weight_transform(attrs, inputs, _):
-    convolution_algorithm = attrs.get_int('convolution_algorithm')
-    out_dype = attrs.get_str('out_dtype')
-    return topi.nn.conv2d_winograd_nnpack_weight_transform(
-        inputs[0], convolution_algorithm, out_dype)
-
-
-@reg.register_schedule("_contrib_conv2d_winograd_nnpack_weight_transform")
-def schedule_contrib_conv2d_winograd_nnpack_weight_transform(attrs, outs, target):
-    with tvm.target.create(target):
-        return topi.generic.schedule_conv2d_winograd_nnpack_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_nnpack_weight_transform", OpPattern.OPAQUE)
-
-
-@reg.register_compute("_contrib_conv2d_winograd_nnpack_without_weight_transform")
-def compute_contrib_conv2d_winograd_nnpack_without_weight_transform(attrs, inputs, _):
-    padding = attrs.get_int_tuple("padding")
-    strides = attrs.get_int_tuple("strides")
-    dilation = attrs.get_int_tuple("dilation")
-    groups = attrs.get_int("groups")
-    layout = attrs.get_str("layout")
-    out_dtype = attrs.get_str("out_dtype")
-    out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
-    assert dilation == (1, 1), "Do not support dilate now"
-    assert groups == 1, "Do not supoort arbitrary group number"
-
-    # pylint: disable=assignment-from-no-return
-    out = topi.nn.conv2d_winograd_nnpack_without_weight_transform(
-        inputs[0], inputs[1], inputs[2] if attrs.get_bool("use_bias") else None,
-        strides, padding, dilation, layout, out_dtype)
-    return out
-
-@reg.register_schedule("_contrib_conv2d_winograd_nnpack_without_weight_transform")
-def schedule_contrib_conv2d_winograd_nnpack_without_weight_transform(attrs, outs, target):
-    with tvm.target.create(target):
-        return topi.generic.schedule_conv2d_winograd_nnpack_without_weight_transform(outs)
-
-reg.register_pattern("_contrib_conv2d_winograd_nnpack_without_weight_transform",
-                     OpPattern.OPAQUE)
-
-
-# conv2d_transpose
-@reg.register_compute("conv2d_transpose")
-def compute_conv2d_transpose(attrs, inputs, _):
-    """Compute definition of conv2d_transpose"""
-    padding = attrs.get_int_tuple("padding")
-    strides = attrs.get_int_tuple("strides")
-    dilation = attrs.get_int_tuple("dilation")
-    groups = attrs.get_int("groups")
-    out_dtype = attrs.get_str("out_dtype")
-    layout = attrs["layout"]
-    out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
-
-    assert layout == "NCHW", "only support nchw for now"
-    assert dilation == (1, 1), "not support dilate now"
-    assert groups == 1, "only support groups == 1 for now"
-
-    out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding, out_dtype)
-    if attrs.get_bool("use_bias"):
-        bias = inputs[2]
-        bias = topi.expand_dims(bias, axis=1, num_newaxis=2)
-        out = topi.add(out, bias)
-    output_padding = attrs.get_int_tuple("output_padding")
-    out = topi.nn.pad(out, \
-        [0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
-    return out
-
-@reg.register_schedule("conv2d_transpose")
-def schedule_conv2d_transpose(attrs, outs, target):
-    """Schedule definition of conv2d_transpose"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_conv2d_transpose_nchw(outs)
-
-reg.register_pattern("conv2d_transpose", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# max_pool2d
-@reg.register_schedule("max_pool2d")
-def schedule_max_pool2d(attrs, outs, target):
-    """Schedule definition of max_pool2d"""
-    layout = attrs["layout"]
-    with tvm.target.create(target):
-        return topi.generic.schedule_pool(outs, layout)
-
-reg.register_pattern("max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# avg_pool2d
-@reg.register_schedule("avg_pool2d")
-def schedule_avg_pool2d(attrs, outs, target):
-    """Schedule definition of avg_pool2d"""
-    layout = attrs["layout"]
-    with tvm.target.create(target):
-        return topi.generic.schedule_pool(outs, layout)
-
-reg.register_pattern("avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# global_max_pool2d
-@reg.register_schedule("global_max_pool2d")
-def schedule_global_max_pool2d(_, outs, target):
-    """Schedule definition of global_max_pool2d"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_adaptive_pool(outs)
-
-reg.register_pattern("global_max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-
-# global_avg_pool2d
-@reg.register_schedule("global_avg_pool2d")
-def schedule_global_avg_pool2d(_, outs, target):
-    """Schedule definition of global_avg_pool2d"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_adaptive_pool(outs)
-
-reg.register_pattern("global_avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
-
-# upsampling
-@reg.register_schedule("upsampling")
-def schedule_upsampling(_, outs, target):
-    """Schedule definition of upsampling"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_injective(outs)
-
-reg.register_pattern("upsampling", OpPattern.INJECTIVE)
-
-@reg.register_compute("lrn")
-def compute_lrn(attrs, inputs, _):
-    """Compute definition of lrn"""
-    size = attrs.get_int("size")
-    axis = attrs.get_int("axis")
-    alpha = attrs.get_float("alpha")
-    beta = attrs.get_float("beta")
-    bias = attrs.get_float("bias")
-    return topi.nn.lrn(inputs[0], size, axis, alpha, beta, bias)
-
-@reg.register_schedule("lrn")
-def schedule_lrn(attrs, outs, target):
-    """Schedule definition of lrn"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_lrn(outs)
-
-reg.register_pattern("lrn", OpPattern.OPAQUE)
-
-@reg.register_compute("l2_normalize")
-def compute_l2_normalize(attrs, inputs, _):
-    """Compute definition of l2 normalize"""
-    eps = attrs.get_float("eps")
-    axis = attrs.get_int_tuple("axis")
-    return topi.nn.l2_normalize(inputs[0], eps, axis)
-
-@reg.register_schedule("l2_normalize")
-def schedule_l2_normalize(attrs, outs, target):
-    """Schedule definition of l2 normalize"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_l2_normalize(outs)
-
-reg.register_pattern("l2_normalize", OpPattern.OUT_ELEMWISE_FUSABLE)
diff --git a/nnvm/python/nnvm/top/reduction.py b/nnvm/python/nnvm/top/reduction.py
deleted file mode 100644
index ce14d0d28831..000000000000
--- a/nnvm/python/nnvm/top/reduction.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Reduction ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-import topi.cuda
-from . import registry as reg
-from .registry import OpPattern
-
-def _schedule_reduce(_, outs, target):
-    """Generic schedule for reduce"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_reduce(outs)
-
-
-_fschedule_reduce = tvm.convert(_schedule_reduce)
-
-def _compute_reduce(f):
-    """auxiliary function"""
-    def _compute(attrs, inputs, out_info):
-        axis = attrs.get_int_tuple("axis")
-        keepdims = attrs.get_bool("keepdims")
-        if axis:
-            return f(inputs[0], axis=axis, keepdims=keepdims)
-        return f(inputs[0], keepdims=keepdims)
-    return _compute
-
-# sum
-reg.register_pattern("sum", OpPattern.COMM_REDUCE)
-reg.register_schedule("sum", _fschedule_reduce)
-
-# max
-reg.register_pattern("max", OpPattern.COMM_REDUCE)
-reg.register_schedule("max", _fschedule_reduce)
-
-# min
-reg.register_pattern("min", OpPattern.COMM_REDUCE)
-reg.register_schedule("min", _fschedule_reduce)
-
-# collapse sum
-reg.register_pattern("collapse_sum", OpPattern.COMM_REDUCE)
-reg.register_schedule("collapse_sum", _fschedule_reduce)
-
-# argmax
-reg.register_pattern("argmax", OpPattern.COMM_REDUCE)
-reg.register_schedule("argmax", _fschedule_reduce)
-
-# argmin
-reg.register_pattern("argmin", OpPattern.COMM_REDUCE)
-reg.register_schedule("argmin", _fschedule_reduce)
-
-# mean
-reg.register_pattern("mean", OpPattern.COMM_REDUCE)
-reg.register_schedule("mean", _fschedule_reduce)
-
-# product
-reg.register_pattern("prod", OpPattern.COMM_REDUCE)
-reg.register_schedule("prod", _fschedule_reduce)
diff --git a/nnvm/python/nnvm/top/registry.py b/nnvm/python/nnvm/top/registry.py
deleted file mode 100644
index 7ad10620f304..000000000000
--- a/nnvm/python/nnvm/top/registry.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Information registry to register operator information for compiler"""
-import tvm
-
-class OpPattern(object):
-    """Operator generic patterns
-
-    See Also
-    --------
-    top.tag : Contains explanation of the tag type.
-    """
-    # Elementwise operator
-    ELEMWISE = 0
-    # Broadcast operator
-    BROADCAST = 1
-    # Injective mapping
-    INJECTIVE = 2
-    # Comunication
-    COMM_REDUCE = 3
-    # Complex op, can still fuse ewise into it
-    OUT_ELEMWISE_FUSABLE = 4
-    # Not fusable opaque op
-    OPAQUE = 8
-
-_register_compute = tvm.get_global_func("nnvm._register_compute")
-_register_schedule = tvm.get_global_func("nnvm._register_schedule")
-_register_pattern = tvm.get_global_func("nnvm._register_pattern")
-_register_alter_op_layout = tvm.get_global_func("nnvm.compiler._register_alter_op_layout")
-
-def register_compute(op_name, f=None, level=10):
-    """Register compute function for operator
-
-    Parameters
-    ----------
-    op_name : str
-        The name of operator
-
-    f : function
-        The schedule function
-
-    level : int
-        The priority level
-
-    Returns
-    -------
-    fregister : function
-        Register function if f is not specified.
-    """
-    def register(myf):
-        """internal register function"""
-        _register_compute(op_name, myf, level)
-        return myf
-    return register(f) if f else register
-
-
-def register_schedule(op_name, f=None, level=10):
-    """Register schedule function for operator
-
-    Parameters
-    ----------
-    op_name : str
-        The name of operator
-
-    f : function
-        The schedule function
-
-    level : int
-        The priority level
-
-    Returns
-    -------
-    fregister : function
-        Register function if f is not specified.
-    """
-    def register(myf):
-        """internal register function"""
-        _register_schedule(op_name, myf, level)
-        return myf
-    return register(f) if f else register
-
-
-def register_pattern(op_name, pattern, level=10):
-    """Register pattern code for operator
-
-    Parameters
-    ----------
-    op_name : str
-        The name of operator
-
-    pattern : int
-        The pattern code.
-
-    level : int
-        The priority level
-    """
-    _register_pattern(op_name, pattern, level)
-
-
-def register_alter_op_layout(op_name, f=None, level=10):
-    """Register alter layout function for operator
-
-    Parameters
-    ----------
-    op_name : str
-        The name of operator
-
-    f : function
-        The schedule function
-
-    level : int
-        The priority level
-
-    Returns
-    -------
-    fregister : function
-        Register function if f is not specified.
-    """
-    def register(myf):
-        """internal register function"""
-        _register_alter_op_layout(op_name, myf, level)
-        return myf
-    return register(f) if f else register
diff --git a/nnvm/python/nnvm/top/tensor.py b/nnvm/python/nnvm/top/tensor.py
deleted file mode 100644
index 9f12e3245e3a..000000000000
--- a/nnvm/python/nnvm/top/tensor.py
+++ /dev/null
@@ -1,306 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Tensor ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-import topi.cuda
-from . import registry as reg
-from .registry import OpPattern
-
-def _schedule_injective(_, outs, target):
-    """Generic schedule for binary bcast"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_injective(outs)
-
-def _compute_binary_scalar(f):
-    """auxiliary function"""
-    @tvm.tag_scope(topi.tag.ELEMWISE)
-    def _compute(attrs, x, _):
-        x = x[0]
-        scalar = attrs.get_float("scalar")
-        scalar = tvm.const(scalar, x.dtype)
-        return tvm.compute(x.shape, lambda *i: f(x(*i), scalar))
-    return _compute
-
-
-def _compute_unary(f):
-    """auxiliary function"""
-    def _compute(attrs, x, _):
-        return f(x[0])
-    return _compute
-
-
-def _compute_binary(f):
-    """auxiliary function"""
-    def _compute(attrs, x, _):
-        return f(x[0], x[1])
-    return _compute
-
-
-_fschedule_injective = tvm.convert(_schedule_injective)
-_fschedule_broadcast = _fschedule_injective
-_fschedule_elemwise = _fschedule_injective
-
-# Assign requires special treatment in the compiler
-# The compute and schedule are designed as
-# copy from rhs to output
-reg.register_pattern("_assign", OpPattern.OPAQUE)
-reg.register_schedule("_assign", _fschedule_broadcast)
-
-# copy
-reg.register_pattern("copy", OpPattern.ELEMWISE)
-reg.register_schedule("copy", _fschedule_broadcast)
-
-# cast
-reg.register_pattern("cast", OpPattern.ELEMWISE)
-reg.register_schedule("cast", _fschedule_broadcast)
-
-# floor
-reg.register_pattern("floor", OpPattern.ELEMWISE)
-reg.register_schedule("floor", _fschedule_broadcast)
-
-# ceil
-reg.register_pattern("ceil", OpPattern.ELEMWISE)
-reg.register_schedule("ceil", _fschedule_broadcast)
-
-# round
-reg.register_pattern("round", OpPattern.ELEMWISE)
-reg.register_schedule("round", _fschedule_broadcast)
-
-# abs
-reg.register_pattern("abs", OpPattern.ELEMWISE)
-reg.register_schedule("abs", _fschedule_broadcast)
-
-# trunc
-reg.register_pattern("trunc", OpPattern.ELEMWISE)
-reg.register_schedule("trunc", _fschedule_broadcast)
-
-# exp
-reg.register_pattern("exp", OpPattern.ELEMWISE)
-reg.register_schedule("exp", _fschedule_broadcast)
-
-# sqrt
-reg.register_pattern("sqrt", OpPattern.ELEMWISE)
-reg.register_schedule("sqrt", _fschedule_broadcast)
-
-# log
-reg.register_pattern("log", OpPattern.ELEMWISE)
-reg.register_schedule("log", _fschedule_broadcast)
-
-# tanh
-reg.register_pattern("tanh", OpPattern.ELEMWISE)
-reg.register_schedule("tanh", _fschedule_broadcast)
-
-# negative
-reg.register_pattern("negative", OpPattern.ELEMWISE)
-reg.register_schedule("negative", _fschedule_broadcast)
-
-# sigmoid
-reg.register_pattern("sigmoid", OpPattern.ELEMWISE)
-reg.register_schedule("sigmoid", _fschedule_broadcast)
-
-# add_scalar
-reg.register_pattern("__add_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__add_scalar__", _fschedule_broadcast)
-
-# sub_calar
-reg.register_pattern("__sub_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__sub_scalar__", _fschedule_broadcast)
-
-# rsub_scalar
-reg.register_pattern("__rsub_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rsub_scalar__", _fschedule_broadcast)
-
-# mul_scalar
-reg.register_pattern("__mul_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__mul_scalar__", _fschedule_broadcast)
-
-# div_scalar
-reg.register_pattern("__div_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__div_scalar__", _fschedule_broadcast)
-
-# rdiv_scalar
-reg.register_pattern("__rdiv_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rdiv_scalar__", _fschedule_broadcast)
-
-# pow_scalar
-reg.register_pattern("__pow_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__pow_scalar__", _fschedule_broadcast)
-
-# rpow_scalar
-reg.register_pattern("__rpow_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rpow_scalar__", _fschedule_broadcast)
-
-# lshift_scalar
-reg.register_pattern("__lshift_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__lshift_scalar__", _fschedule_broadcast)
-
-# rshift_scalar
-reg.register_pattern("__rshift_scalar__", OpPattern.ELEMWISE)
-reg.register_schedule("__rshift_scalar__", _fschedule_broadcast)
-
-# logical_and
-reg.register_pattern("logical_and", OpPattern.ELEMWISE)
-reg.register_schedule("logical_and", _fschedule_broadcast)
-
-# logical_or
-reg.register_pattern("logical_or", OpPattern.ELEMWISE)
-reg.register_schedule("logical_or", _fschedule_broadcast)
-
-# logical_not
-reg.register_pattern("logical_not", OpPattern.ELEMWISE)
-reg.register_schedule("logical_not", _fschedule_broadcast)
-
-# elemwise_add
-reg.register_pattern("elemwise_add", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_add", _fschedule_broadcast)
-
-# elemwise_sub
-reg.register_pattern("elemwise_sub", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_sub", _fschedule_broadcast)
-
-# elemwise_mul
-reg.register_pattern("elemwise_mul", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_mul", _fschedule_broadcast)
-
-# elemwise_div
-reg.register_pattern("elemwise_div", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_div", _fschedule_broadcast)
-
-# elemwise_mod
-reg.register_pattern("elemwise_mod", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_mod", _fschedule_broadcast)
-
-# elemwise_pow
-reg.register_pattern("elemwise_pow", OpPattern.BROADCAST)
-reg.register_schedule("elemwise_pow", _fschedule_broadcast)
-
-# broadcast_add
-reg.register_pattern("broadcast_add", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_add", _fschedule_broadcast)
-
-# broadcast_sub
-reg.register_pattern("broadcast_sub", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_sub", _fschedule_broadcast)
-
-# broadcast_mul
-reg.register_pattern("broadcast_mul", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_mul", _fschedule_broadcast)
-
-# broadcast_div
-reg.register_pattern("broadcast_div", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_div", _fschedule_broadcast)
-
-# broadcast mod
-reg.register_pattern("broadcast_mod", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_mod", _fschedule_broadcast)
-
-# broadcast max
-reg.register_pattern("broadcast_max", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_max", _fschedule_broadcast)
-
-# broadcast min
-reg.register_pattern("broadcast_min", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_min", _fschedule_broadcast)
-
-# broadcast pow
-reg.register_pattern("broadcast_pow", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_pow", _fschedule_broadcast)
-
-# broadcast left_shift
-reg.register_pattern("broadcast_left_shift", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_left_shift", _fschedule_broadcast)
-
-# broadcast right_shift
-reg.register_pattern("broadcast_right_shift", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_right_shift", _fschedule_broadcast)
-
-# broadcast greater
-reg.register_pattern("broadcast_greater", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_greater", _fschedule_broadcast)
-
-# broadcast less
-reg.register_pattern("broadcast_less", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_less", _fschedule_broadcast)
-
-# broadcast equal
-reg.register_pattern("broadcast_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_equal", _fschedule_broadcast)
-
-# broadcast not_equal
-reg.register_pattern("broadcast_not_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_not_equal", _fschedule_broadcast)
-
-# broadcast greater_equal
-reg.register_pattern("broadcast_greater_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_greater_equal", _fschedule_broadcast)
-
-# broadcast less_equal
-reg.register_pattern("broadcast_less_equal", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_less_equal", _fschedule_broadcast)
-
-# broadcast_to
-reg.register_pattern("broadcast_to", OpPattern.BROADCAST)
-reg.register_schedule("broadcast_to", _fschedule_broadcast)
-
-# clip
-reg.register_pattern("clip", OpPattern.ELEMWISE)
-reg.register_schedule("clip", _fschedule_elemwise)
-
-# elemwise sum
-reg.register_pattern("elemwise_sum", OpPattern.ELEMWISE)
-reg.register_schedule("elemwise_sum", _fschedule_elemwise)
-
-# full
-reg.register_pattern("full", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("full", _fschedule_elemwise)
-
-# full_like
-reg.register_pattern("full_like", OpPattern.ELEMWISE)
-reg.register_schedule("full_like", _fschedule_elemwise)
-
-# zeros
-reg.register_pattern("zeros", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("zeros", _fschedule_elemwise)
-
-# zeros_like
-reg.register_pattern("zeros_like", OpPattern.ELEMWISE)
-reg.register_schedule("zeros_like", _fschedule_elemwise)
-
-# ones
-reg.register_pattern("ones", OpPattern.OUT_ELEMWISE_FUSABLE)
-reg.register_schedule("ones", _fschedule_elemwise)
-
-# ones_like
-reg.register_pattern("ones_like", OpPattern.ELEMWISE)
-reg.register_schedule("ones_like", _fschedule_elemwise)
-
-# greater
-reg.register_pattern("greater", OpPattern.ELEMWISE)
-reg.register_schedule("greater", _fschedule_elemwise)
-
-# less
-reg.register_pattern("less", OpPattern.ELEMWISE)
-reg.register_schedule("less", _fschedule_elemwise)
-
-# block_grad
-reg.register_compute("block_grad", _compute_unary(topi.identity))
-reg.register_pattern("block_grad", OpPattern.ELEMWISE)
-reg.register_schedule("block_grad", _fschedule_elemwise)
diff --git a/nnvm/python/nnvm/top/transform.py b/nnvm/python/nnvm/top/transform.py
deleted file mode 100644
index e9051309734a..000000000000
--- a/nnvm/python/nnvm/top/transform.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Tensor transformation ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from .tensor import _fschedule_broadcast, _fschedule_injective
-from . import registry as reg
-from .registry import OpPattern
-
-# expand_dims
-reg.register_pattern("expand_dims", OpPattern.BROADCAST)
-reg.register_schedule("expand_dims", _fschedule_broadcast)
-
-# expand_like
-@reg.register_compute("expand_like")
-def compute_expand_like(attrs, inputs, _):
-    """Compute definition of expand_like"""
-    if len(inputs[0].shape) == len(inputs[1].shape):
-        # If the number of dimensions is not changed then it is just a broadcasting
-        return topi.broadcast_to(inputs[0], inputs[1].shape)
-
-    exclude = attrs.get_bool("exclude")
-    axis = attrs.get_int_tuple("axis")
-    if exclude:
-        exclude_axis = (axis,) if isinstance(axis, int) else axis
-        axis = []
-        for item in range(len(inputs[1].shape)):
-            if item not in exclude_axis:
-                axis.append(item)
-        axis = tuple(axis)
-
-    return topi.transform.expand_like(inputs[0], inputs[1], axis)
-reg.register_pattern("expand_like", OpPattern.BROADCAST)
-reg.register_schedule("expand_like", _fschedule_broadcast)
-
-# reshape_like
-@reg.register_compute("reshape_like")
-def compute_reshape_like(attrs, inputs, out_info):
-    """Compute definition of reshape_like"""
-    return topi.reshape(inputs[0], inputs[1].shape)
-reg.register_pattern("reshape_like", OpPattern.INJECTIVE)
-reg.register_schedule("reshape_like", _fschedule_injective)
-
-# transpose
-reg.register_pattern("transpose", OpPattern.INJECTIVE)
-reg.register_schedule("transpose", _fschedule_injective)
-
-# flip
-reg.register_pattern("flip", OpPattern.INJECTIVE)
-reg.register_schedule("flip", _fschedule_injective)
-
-# reshape
-reg.register_pattern("reshape", OpPattern.INJECTIVE)
-reg.register_schedule("reshape", _fschedule_injective)
-
-# squeeze
-reg.register_pattern("squeeze", OpPattern.INJECTIVE)
-reg.register_schedule("squeeze", _fschedule_injective)
-
-# concatenate
-@reg.register_schedule("concatenate")
-def schedule_concatenate(_, outs, target):
-    """Schedule definition of concatenate"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_concatenate(outs)
-
-reg.register_pattern("concatenate", OpPattern.INJECTIVE)
-
-# split
-reg.register_pattern("split", OpPattern.INJECTIVE)
-reg.register_schedule("split", _fschedule_injective)
-
-# take
-reg.register_pattern("take", OpPattern.INJECTIVE)
-reg.register_schedule("take", _fschedule_injective)
-
-# strided_slice
-reg.register_pattern("strided_slice", OpPattern.INJECTIVE)
-reg.register_schedule("strided_slice", _fschedule_injective)
-
-# slice_like
-reg.register_pattern("slice_like", OpPattern.INJECTIVE)
-reg.register_schedule("slice_like", _fschedule_injective)
-
-# where
-reg.register_pattern("where", OpPattern.INJECTIVE)
-reg.register_schedule("where", _fschedule_injective)
-
-# gather_nd
-reg.register_pattern("gather_nd", OpPattern.INJECTIVE)
-reg.register_schedule("gather_nd", _fschedule_injective)
diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py
deleted file mode 100644
index 2e18cf7023ef..000000000000
--- a/nnvm/python/nnvm/top/vision.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name, unused-argument
-"""Definition of nn ops"""
-from __future__ import absolute_import
-
-import tvm
-import topi
-from . import registry as reg
-from .registry import OpPattern
-
-@reg.register_compute("yolo_reorg")
-def compute_reorg(attrs, inputs, _):
-    """Compute definition of reorg"""
-    return topi.vision.reorg(inputs[0], attrs.get_int("stride"))
-
-@reg.register_schedule("yolo_reorg")
-def schedule_reorg(attrs, outs, target):
-    """Schedule definition of reorg"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_injective(outs)
-
-reg.register_pattern("yolo_reorg", OpPattern.INJECTIVE)
-
-# multibox_prior
-@reg.register_schedule("multibox_prior")
-def schedule_multibox_prior(_, outs, target):
-    """Schedule definition of multibox_prior"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_multibox_prior(outs)
-
-@reg.register_compute("multibox_prior")
-def compute_multibox_prior(attrs, inputs, _):
-    """Compute definition of multibox_prior"""
-    sizes = attrs.get_float_tuple('sizes')
-    ratios = attrs.get_float_tuple('ratios')
-    steps = attrs.get_float_tuple('steps')
-    offsets = attrs.get_float_tuple('offsets')
-    clip = attrs.get_bool('clip')
-
-    return topi.vision.ssd.multibox_prior(inputs[0], sizes, ratios,
-                                          steps, offsets, clip)
-
-reg.register_pattern("multibox_prior", OpPattern.OPAQUE)
-
-# multibox_transform_loc
-@reg.register_schedule("multibox_transform_loc")
-def schedule_multibox_transform_loc(_, outs, target):
-    """Schedule definition of multibox_detection"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_multibox_transform_loc(outs)
-
-@reg.register_compute("multibox_transform_loc")
-def compute_multibox_transform_loc(attrs, inputs, _):
-    """Compute definition of multibox_detection"""
-    clip = attrs.get_bool('clip')
-    threshold = attrs.get_float('threshold')
-    variance = attrs.get_float_tuple('variances')
-
-    return topi.vision.ssd.multibox_transform_loc(inputs[0], inputs[1], inputs[2],
-                                                  clip, threshold, variance)
-
-reg.register_pattern("multibox_detection", OpPattern.OPAQUE)
-
-# non-maximum suppression
-@reg.register_schedule("non_max_suppression")
-def schedule_nms(_, outs, target):
-    """Schedule definition of non_max_suppression"""
-    with tvm.target.create(target):
-        return topi.generic.schedule_nms(outs)
-
-@reg.register_compute("non_max_suppression")
-def compute_nms(attrs, inputs, _):
-    """Compute definition of non_max_suppression"""
-    return_indices = attrs.get_bool('return_indices')
-    max_output_size = attrs.get_int('max_output_size')
-    iou_threshold = attrs.get_float('iou_threshold')
-    force_suppress = attrs.get_bool('force_suppress')
-    top_k = attrs.get_int('top_k')
-    id_index = attrs.get_int('id_index')
-    invalid_to_bottom = attrs.get_bool('invalid_to_bottom')
-
-    return topi.vision.non_max_suppression(inputs[0], inputs[1],
-                                           max_output_size=max_output_size,
-                                           iou_threshold=iou_threshold,
-                                           force_suppress=force_suppress,
-                                           top_k=top_k, id_index=id_index,
-                                           return_indices=return_indices,
-                                           invalid_to_bottom=invalid_to_bottom)
-
-reg.register_pattern("non_max_suppression", OpPattern.OPAQUE)
diff --git a/nnvm/python/setup.py b/nnvm/python/setup.py
deleted file mode 100644
index f89ac33a2e39..000000000000
--- a/nnvm/python/setup.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-import sys
-from setuptools import find_packages
-from distutils.core import setup
-
-def config_cython():
-    # temporary disable cython for now
-    # as NNVM uses local DLL build
-    return []
-    try:
-        from Cython.Build import cythonize
-        from distutils.extension import Extension
-        if sys.version_info >= (3, 0):
-            subdir = "_cy3"
-        else:
-            subdir = "_cy2"
-        ret = []
-        path = "nnvm/cython"
-
-        for fn in os.listdir(path):
-            if not fn.endswith(".pyx"):
-                continue
-            ret.append(Extension(
-                "nnvm/%s/%s" % (subdir, fn[:-4]),
-                ["nnvm/cython/%s" % fn],
-                include_dirs=["../include/"],
-                language="c++"))
-        return cythonize(ret)
-    except:
-        print("Cython is not installed, will compile without cython module")
-        return []
-
-# We can not import `libinfo.py` in setup.py directly since __init__.py
-# Will be invoked which introduces dependences
-CURRENT_DIR = os.path.dirname(__file__)
-libinfo_py = os.path.join(CURRENT_DIR, './nnvm/libinfo.py')
-libinfo = {'__file__': libinfo_py}
-exec(compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'), libinfo, libinfo)
-
-__version__ = libinfo['__version__']
-if not os.getenv('CONDA_BUILD'):
-    LIB_PATH = libinfo['find_lib_path']()
-    _, LIB_NAME = os.path.split(LIB_PATH[0])
-    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-    rpath = os.path.relpath(LIB_PATH[0], curr_path)
-    setup_kwargs = dict(
-        include_package_data=True,
-        data_files=[('nnvm', [rpath])]
-    )
-else:
-    setup_kwargs = {}
-
-setup(name='nnvm',
-      version=__version__,
-      description="NNVM: Open Compiler for AI Frameworks",
-      zip_safe=False,
-      install_requires=[
-        'numpy'
-      ],
-      packages=find_packages(),
-      url='https://github.com/dmlc/nnvm',
-      **setup_kwargs)
diff --git a/nnvm/src/README.md b/nnvm/src/README.md
index c1b66260625e..64fd1371719a 100644
--- a/nnvm/src/README.md
+++ b/nnvm/src/README.md
@@ -23,8 +23,3 @@ The following components are operator invariant.
 - c_api: NNVM C API
 - core: NNVM core data structure
 - pass: NNVM pass
-
-The following components are generic NNVM compiler and defines tensor operator set
-
-- top: NNVM core tensor operators
-- compiler: NNVM compiler toolchain
diff --git a/nnvm/src/compiler/alter_op_layout.cc b/nnvm/src/compiler/alter_op_layout.cc
deleted file mode 100644
index 8a6694f166d4..000000000000
--- a/nnvm/src/compiler/alter_op_layout.cc
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file alter_op_layout.cc
- * \brief Alter the operator layouts. Keep inferred layouts (if any) from previous stages.
- *        e.g., convolution may calculates faster with NCHW16c layout.
- */
-#include <nnvm/pass.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/layout.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/pass_functions.h>
-#include <tvm/operation.h>
-#include <algorithm>
-#include <functional>
-#include "compile_engine.h"
-#include "graph_transform.h"
-
-namespace nnvm {
-namespace compiler {
-namespace {
-
-tvm::Array<tvm::Tensor> GetTensorInfo(const IndexedGraph& idx_graph,
-                                      const uint32_t nid,
-                                      const ShapeVector& shape_vec,
-                                      const DTypeVector& dtype_vec) {
-  tvm::Array<tvm::Tensor> vec;
-  for (uint32_t i = 0; i < idx_graph[nid].source->num_outputs(); ++i) {
-    tvm::Array<tvm::Expr> shape;
-    for (int64_t x : shape_vec[idx_graph.entry_id(nid, i)]) {
-      CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
-      shape.push_back(tvm::make_const(tvm::DataType::Int(32), x));
-    }
-    vec.push_back(tvm::placeholder(
-      shape, GetTVMType(dtype_vec[idx_graph.entry_id(nid, i)])));
-  }
-  return vec;
-}
-
-Graph AlterOpLayout(const Graph& src) {
-  static auto& falter_op_layout =
-    Op::GetAttr<nnvm::compiler::FTVMAlterOpLayout >("FTVMAlterOpLayout");
-
-  const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
-  const DTypeVector& dtype_vec = src.GetAttr<DTypeVector>("dtype");
-  const IndexedGraph& idx_graph = src.indexed_graph();
-
-  std::vector<std::vector<Layout> > in_layouts_of_node(idx_graph.num_nodes());
-  std::vector<std::vector<Layout> > out_layouts_of_node(idx_graph.num_nodes());
-  std::unordered_map<const Node*, uint32_t> unchanged_nodes;
-
-  if (src.HasAttr("layout")) {
-    // record layouts so that LayoutTransform pass can fix layouts correctly,
-    // e.g., conv2d can be replaced by some contrib implement
-    // whose layout is different from the original one
-    // (which was imported from a model file).
-    const auto& layouts = src.GetAttr<std::vector<Layout> >("layout");
-    for (uint32_t nid = 0; nid < idx_graph.num_nodes(); ++nid) {
-      const auto &inode = idx_graph[nid];
-      // record input layouts for all nodes,
-      // while replaced nodes will ignore the records here and have undefined input layouts.
-      std::vector<Layout> in_layout;
-      for (const auto& e : inode.inputs) {
-        in_layout.emplace_back(layouts[idx_graph.entry_id(e)]);
-      }
-      in_layouts_of_node[nid] = in_layout;
-
-      std::vector<Layout> out_layout;
-      for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
-        out_layout.emplace_back(layouts[idx_graph.entry_id(nid, i)]);
-      }
-      out_layouts_of_node[nid] = out_layout;
-    }
-  }
-
-  auto transform = [&](uint32_t nid,
-                       const NodePtr& n,
-                       std::vector<NodeEntry>* ret) {
-    nnvm::compiler::FTVMAlterOpLayout fn_alter_op_layout =
-      falter_op_layout.get(n->op(), nullptr);
-    if (fn_alter_op_layout == nullptr) {
-      // will restore the original input layouts later.
-      unchanged_nodes[n.get()] = nid;
-      return false;
-    }
-
-    // construct parameters for registered function
-    std::vector<Symbol> op_inputs;
-    tvm::Array<tvm::Tensor> tensor_infos;
-    CHECK_EQ(n->num_inputs(), idx_graph[nid].inputs.size());
-    for (uint32_t i = 0; i < n->num_inputs(); ++i) {
-      const nnvm::NodeEntry& input = n->inputs[i];
-      // input operator
-      Symbol op_input;
-      op_input.outputs.push_back(input);
-      op_inputs.push_back(op_input);
-
-      // input tinfo, extract from the original graph
-      // because it was where infer_shape & infer_type applied.
-      tvm::Array<tvm::Tensor> op_output_tinfos =
-        GetTensorInfo(idx_graph, idx_graph[nid].inputs[i].node_id,
-                      shape_vec, dtype_vec);
-      tensor_infos.push_back(op_output_tinfos[input.index]);
-    }
-    // callback registered function to get a new operator.
-    Symbol op;
-    bool do_alter =
-      fn_alter_op_layout(n->attrs, Symbol::CreateGroup(op_inputs), tensor_infos, &op);
-
-    if (do_alter) {
-      *ret = op.outputs;
-    } else {
-      // will restore the original input layouts later.
-      unchanged_nodes[n.get()] = nid;
-    }
-    return do_alter;
-  };
-
-  Graph ret = nnvm::compiler::GraphTransform(src, transform);
-
-  if (src.HasAttr("layout")) {
-    // restore the layouts to return graph
-    const auto& ret_idx = ret.indexed_graph();
-    std::vector<Layout> ret_layouts(ret_idx.num_node_entries(), Layout::Undef());
-    for (uint32_t nid = 0; nid < ret_idx.num_nodes(); ++nid) {
-      const auto& inode = ret_idx[nid];
-      if (unchanged_nodes.count(inode.source)) {
-        const std::vector<Layout>& in_layouts =
-          in_layouts_of_node[unchanged_nodes[inode.source]];
-        for (uint32_t i = 0; i < inode.inputs.size(); ++i) {
-          const auto& e = inode.inputs[i];
-          ret_layouts[ret_idx.entry_id(e)] = in_layouts[i];
-        }
-        const std::vector<Layout>& out_layouts =
-          out_layouts_of_node[unchanged_nodes[inode.source]];
-        for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
-          ret_layouts[ret_idx.entry_id(nid, i)] = out_layouts[i];
-        }
-      }
-    }
-
-    // cannot call indexed_graph() before return the origin Graph,
-    // thus create a new one.
-    nnvm::Graph new_ret;
-    new_ret.outputs = ret.outputs;
-    new_ret.attrs["layout"] = std::make_shared<any>(std::move(ret_layouts));
-    return new_ret;
-  }
-
-  return ret;
-}
-
-// register pass
-NNVM_REGISTER_PASS(AlterOpLayout)
-.set_body(AlterOpLayout)
-.set_change_graph(true);
-
-}  // namespace
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/compile_engine.cc b/nnvm/src/compiler/compile_engine.cc
deleted file mode 100644
index 82d8ff31612e..000000000000
--- a/nnvm/src/compiler/compile_engine.cc
+++ /dev/null
@@ -1,401 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file compile_engine.cc
- * \brief The compile engine.
- */
-#include <dmlc/common.h>
-#include <tvm/ir.h>
-#include <tvm/operation.h>
-#include <nnvm/graph.h>
-#include <nnvm/node.h>
-#include <nnvm/pass_functions.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <mutex>
-#include <tuple>
-#include <vector>
-#include <limits>
-#include <unordered_map>
-#include "graph_hash.h"
-#include "compile_engine.h"
-
-namespace nnvm {
-namespace compiler {
-
-using namespace tvm;
-
-/*!
- * \brief Get type flag from TVM Type
- *
- * \param type the tvm type.
- * \return corresponding DLDataType
- */
-int GetTypeFlag(tvm::DataType type) {
-  if (type == tvm::DataType::Float(32)) return 0;
-  if (type == tvm::DataType::Float(64)) return 1;
-  if (type == tvm::DataType::Float(16)) return 2;
-  if (type == tvm::DataType::UInt(8)) return 3;
-  if (type == tvm::DataType::Int(32)) return 4;
-  if (type == tvm::DataType::Int(8)) return 5;
-  if (type == tvm::DataType::Int(64)) return 6;
-  if (type == tvm::DataType::Int(16)) return 7;
-  if (type == tvm::DataType::UInt(16)) return 8;
-  if (type == tvm::DataType::UInt(32)) return 9;
-  if (type == tvm::DataType::UInt(64)) return 10;
-  if (type == tvm::DataType::UInt(1)) return 11;
-  LOG(FATAL) << "cannot convert " << type;
-  return 0;
-}
-// convert from type flag to tvm type.
-DataType GetTVMType(int type_flag) {
-  switch (type_flag) {
-    case 0:
-      return tvm::DataType::Float(32);
-    case 1:
-      return tvm::DataType::Float(64);
-    case 2:
-      return tvm::DataType::Float(16);
-    case 3:
-      return tvm::DataType::UInt(8);
-    case 4:
-      return tvm::DataType::Int(32);
-    case 5:
-      return tvm::DataType::Int(8);
-    case 6:
-      return tvm::DataType::Int(64);
-    case 7:
-      return tvm::DataType::Int(16);
-    case 8:
-      return tvm::DataType::UInt(16);
-    case 9:
-      return tvm::DataType::UInt(32);
-    case 10:
-      return tvm::DataType::UInt(64);
-    case 11:
-      return tvm::DataType::UInt(1);
-    default:
-      LOG(FATAL) << "unknown type_flag=" << type_flag;
-      return DataType::Float(32);
-  }
-}
-
-// internal compile engine
-class CompileEngine {
- public:
-  static CompileEngine* Global() {
-    static CompileEngine inst;
-    return &inst;
-  }
-  // lower graph possible get back an cached op.
-  GraphFunc Lower(Graph graph,
-                  const Array<tvm::Tensor>& inputs,
-                  const std::string& target,
-                  int master_idx) {
-    GraphKey key = GraphKeyNode::make(graph, inputs, target);
-    std::lock_guard<std::mutex> lock(mutex_);
-    auto it = cache_.find(key);
-    if (it != cache_.end()) {
-      ++(it->second->use_count);
-      return it->second->graph_func;
-    }
-    GraphFunc f = DoLower(key->graph, key->inputs, key->target, master_idx);
-    auto n = tvm::make_node<GraphCacheEntryNode>();
-    n->graph_func = f;
-    n->use_count = 1;
-    n->master_idx = master_idx;
-    cache_[key] = GraphCacheEntry(n);
-    return f;
-  }
-  // List all items in the cache.
-  Array<NodeRef> ListCacheItems() {
-    std::lock_guard<std::mutex> lock(mutex_);
-    Array<NodeRef> items;
-    for (auto& kv : cache_) {
-      items.push_back(kv.first);
-      auto n = tvm::make_node<GraphCacheEntryNode>(*(kv.second.operator->()));
-      items.push_back(GraphCacheEntry(n));
-    }
-    return items;
-  }
-  // Find the function given graph key.
-  GraphCacheEntry Find(const GraphKey& key) {
-    std::lock_guard<std::mutex> lock(mutex_);
-    auto it = cache_.find(key);
-    if (it != cache_.end()) {
-      return it->second;
-    } else {
-      return GraphCacheEntry();
-    }
-  }
-  // Set the given function on given graph key.
-  void Set(const GraphKey& key, GraphFunc func) {
-    std::lock_guard<std::mutex> lock(mutex_);
-    auto n = tvm::make_node<GraphCacheEntryNode>();
-    n->graph_func = func;
-    n->use_count = 1;
-    cache_[key] = GraphCacheEntry(n);
-  }
-    // Clear the function cache.
-  void Clear() {
-    std::lock_guard<std::mutex> lock(mutex_);
-    cache_.clear();
-  }
-
-  // get schedule and its args
-  std::tuple<Schedule, Array<tvm::Tensor>, Graph>
-  GetScheduleArgs(Graph graph,
-                  const Array<tvm::Tensor> &inputs,
-                  const std::string &target,
-                  int master_idx,
-                  std::string *readable_name,
-                  Array<tvm::Tensor> *outputs) {
-    // shape, type
-    static auto& fcompute =
-        nnvm::Op::GetAttr<FTVMCompute>("FTVMCompute");
-    static auto& fschedule =
-        nnvm::Op::GetAttr<FTVMSchedule>("FTVMSchedule");
-
-    std::vector<TShape> ishape;
-    std::vector<int> idtype;
-
-    for (const tvm::Tensor t : inputs) {
-      std::vector<dim_t> shape;
-      for (Expr v : t->shape) {
-        CHECK(v.as<tvm::ir::IntImm>());
-        shape.push_back(v.as<tvm::ir::IntImm>()->value);
-      }
-      ishape.emplace_back(TShape(shape.begin(), shape.end()));
-      idtype.emplace_back(GetTypeFlag(t->dtype));
-    }
-    graph = pass::InferShape(graph, ishape);
-    graph = pass::InferType(graph, idtype);
-
-    const ShapeVector& shape_vec = graph.GetAttr<ShapeVector>("shape");
-    const DTypeVector& dtype_vec = graph.GetAttr<DTypeVector>("dtype");
-    const IndexedGraph& idx = graph.indexed_graph();
-    CHECK_EQ(inputs.size(), idx.input_nodes().size());
-
-    std::vector<tvm::Tensor> tensor_vec(idx.num_node_entries());
-    for (size_t i = 0; i < idx.input_nodes().size(); ++i) {
-      uint32_t nid = idx.input_nodes()[i];
-      tensor_vec[idx.entry_id(nid, 0)] = inputs[i];
-    }
-
-    std::ostringstream readable_name_os;
-    readable_name_os << "fuse";
-    for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-      const auto& inode = idx[nid];
-      if (inode.source->is_variable()) continue;
-      Array<Tensor> op_inputs, out_info;
-      readable_name_os << "_" << inode.source->op()->name;
-      // input array
-      for (const IndexedGraph::NodeEntry& e : inode.inputs) {
-        const tvm::Tensor& t = tensor_vec[idx.entry_id(e)];
-        CHECK(t.defined());
-        op_inputs.push_back(t);
-      }
-      // output hint
-      for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
-        Array<Expr> shape;
-        for (int64_t x : shape_vec[idx.entry_id(nid, i)]) {
-          CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
-          shape.push_back(make_const(DataType::Int(32), x));
-        }
-        out_info.push_back(
-            placeholder(shape,
-                        GetTVMType(dtype_vec[idx.entry_id(nid, i)])));
-      }
-      // get default
-      Array<Tensor> out = fcompute[inode.source->op()](
-          inode.source->attrs, op_inputs, out_info);
-      CHECK_EQ(out.size(), inode.source->num_outputs());
-
-      // check output dimentions also match
-      // This check is to make sure the NNVM operator Infer match with Compute result.
-      // Missing this check may pass the build but leads to runtime errors.
-      for (uint32_t i = 0; i < out.size(); ++i) {
-        CHECK_EQ(out[i].ndim(), out_info[i].ndim()) << inode.source->op()->name;
-        tvm::Tensor inferred_tensor = out[i];
-        tvm::Tensor computed_tensor = out_info[i];
-        for (uint32_t j = 0; j < inferred_tensor->shape.size(); ++j) {
-          if ((as_const_int(inferred_tensor->shape[j])) &&
-              (as_const_int(computed_tensor->shape[j])))
-            CHECK_EQ((*as_const_int(inferred_tensor->shape[j])),
-                     (*as_const_int(computed_tensor->shape[j]))) << inode.source->op()->name;
-        }
-      }
-
-      // schedule on root node, and use master's schedule
-      for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
-        uint32_t eid = idx.entry_id(nid, index);
-        tensor_vec[eid] = out[index];
-      }
-    }
-    // Schedule on final output.
-    Array<Tensor> all_args = inputs;
-    Array<Tensor> outs;
-    for (const IndexedGraph::NodeEntry& e : idx.outputs()) {
-      const tvm::Tensor& t = tensor_vec[idx.entry_id(e)];
-      CHECK(t.defined());
-      outs.push_back(t);
-      all_args.push_back(t);
-    }
-
-    Schedule sch = fschedule[idx[master_idx].source->op()](
-        idx[master_idx].source->attrs, outs, target);
-
-    // store extra return values
-    if (readable_name != nullptr) {
-      *readable_name = readable_name_os.str();
-    }
-    if (outputs != nullptr) {
-      *outputs = outs;
-    }
-
-    return std::make_tuple(sch, all_args, graph);
-  }
-
-  // run the actual lowering process
-  GraphFunc DoLower(Graph graph,
-                    const Array<tvm::Tensor>& inputs,
-                    const std::string& target,
-                    int master_idx) {
-    std::string readable_name;
-    Array<tvm::Tensor> all_args;
-    Array<tvm::Tensor> outputs;
-    Schedule sch;
-
-    std::tie(sch, all_args, graph) = GetScheduleArgs(
-        graph, inputs, target, master_idx,
-        &readable_name, &outputs);
-
-    auto gf = tvm::make_node<GraphFuncNode>();
-    gf->target = target;
-    gf->func_name = GetUniqeName(readable_name);
-    gf->inputs = inputs;
-    gf->outputs = outputs;
-    static const PackedFunc& flower = GetPackedFunc("nnvm.compiler.lower");
-    gf->funcs = flower(sch, all_args, gf->func_name, graph);
-    return GraphFunc(gf);
-  }
-
- private:
-  // Get unique name
-  std::string GetUniqeName(std::string name) {
-    while (true) {
-      auto it = name_map_.find(name);
-      if (it == name_map_.end()) {
-        name_map_[name] = 1;
-        return name;
-      } else {
-        std::ostringstream os;
-        os << name << "_" << it->second;
-        ++(it->second);
-        name = os.str();
-      }
-    }
-    return name;
-  }
-
-  // global mutex
-  std::mutex mutex_;
-  // the name map
-  std::unordered_map<std::string, int> name_map_;
-  // the compiler cache
-  std::unordered_map<GraphKey, GraphCacheEntry,
-                     GraphKeyHash, GraphKeyEqual> cache_;
-};
-
-GraphFunc GraphLower(Graph graph,
-                     const Array<tvm::Tensor>& inputs,
-                     const std::string& target,
-                     int master_idx) {
-  return CompileEngine::Global()->Lower(
-      graph, inputs, target, master_idx);
-}
-
-// Expose cache to front end
-TVM_REGISTER_GLOBAL("nnvm.compiler.ListCacheItems")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
-    *rv = CompileEngine::Global()->ListCacheItems();
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.ClearCache")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
-    CompileEngine::Global()->Clear();
-  });
-
-// NOTE: this involves graph lookup and can be slow
-TVM_REGISTER_GLOBAL("nnvm.compiler.GetCacheItem")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
-    *rv = CompileEngine::Global()->Find(args[0]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.SetCacheItem")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
-    CompileEngine::Global()->Set(args[0], args[1]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.GraphKeyGetGraph")
-.set_body([](tvm::runtime::TVMArgs args, tvm::runtime::TVMRetValue *rv) {
-    *rv = args[0].operator GraphKey()->graph;
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler.MakeGraphKey")
-.set_body_typed(GraphKeyNode::make);
-
-// This can be used to extract workloads from nnvm compiler
-TVM_REGISTER_GLOBAL("nnvm.compiler.CacheItem2ScheduleArgs")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    Array<tvm::NodeRef> item = args[0];
-
-    const GraphKeyNode *key = reinterpret_cast<const GraphKeyNode *>(item[0].get());
-    const GraphCacheEntryNode *value = reinterpret_cast<const GraphCacheEntryNode *>(item[1].get());
-
-    // extract arguments from cached item
-    Graph graph = key->graph;
-    const Array<tvm::Tensor> &inputs = key->inputs;
-    std::string target = args[1];
-    int master_idx = value->master_idx;
-
-    Schedule sch;
-    Array<tvm::Tensor> all_args;
-    std::tie(sch, all_args, graph) =
-        CompileEngine::Global()->GetScheduleArgs(
-        graph, inputs, target, master_idx, nullptr, nullptr);
-
-    Array<tvm::NodeRef> ret;
-    ret.push_back(sch);
-    ret.push_back(all_args);
-    *rv = ret;
-  });
-
-TVM_REGISTER_NODE_TYPE(GraphFuncNode);
-TVM_REGISTER_NODE_TYPE(GraphCacheEntryNode);
-
-TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable)
-.set_dispatch<GraphFuncNode>([](const ObjectRef& ref, IRPrinter* p) {
-    auto* op = static_cast<const GraphFuncNode*>(ref.get());
-    p->stream << "GraphFunc(name=" << op->func_name
-              << ", addr=" << op << ")";
-});
-
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/compile_engine.h b/nnvm/src/compiler/compile_engine.h
deleted file mode 100644
index b4fec104bbcb..000000000000
--- a/nnvm/src/compiler/compile_engine.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file compile_engine.h
- * \brief Internal engine to compile a subgraph fragment and cache compilation.
- */
-#ifndef NNVM_COMPILER_COMPILE_ENGINE_H_
-#define NNVM_COMPILER_COMPILE_ENGINE_H_
-
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/tuple.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <tvm/runtime/packed_func.h>
-#include <tvm/operation.h>
-#include <tvm/lowered_func.h>
-#include <string>
-#include <utility>
-#include "graph_hash.h"
-
-namespace nnvm {
-namespace compiler {
-
-/*! \brief A TVM Node to represent compiled graph function */
-struct GraphFuncNode : public tvm::Node {
-  /* \brief compiled target */
-  std::string target;
-  /*! \brief Function name */
-  std::string func_name;
-  /* \brief The inputs to the function */
-  tvm::Array<Tensor> inputs;
-  /* \brief The outputs to the function */
-  tvm::Array<Tensor> outputs;
-  /*! \brief The lowered functions */
-  tvm::Array<tvm::LoweredFunc> funcs;
-
-  void VisitAttrs(tvm::AttrVisitor* v) {
-    v->Visit("target", &target);
-    v->Visit("func_name", &func_name);
-    v->Visit("inputs", &inputs);
-    v->Visit("outputs", &outputs);
-    v->Visit("funcs", &funcs);
-  }
-
-  static constexpr const char* _type_key = "GraphFunc";
-  TVM_DECLARE_NODE_TYPE_INFO(GraphFuncNode, tvm::Node);
-};
-
-TVM_DEFINE_NODE_REF(GraphFunc, GraphFuncNode);
-
-/*! \brief Cache Entry in the graph */
-struct GraphCacheEntryNode : public tvm::Node {
-  /*! \brief The graph function */
-  GraphFunc graph_func;
-  /*! \brief Usage statistics */
-  int use_count{0};
-  /*! \brief Index of the master node for calling schedule*/
-  int master_idx;
-
-  void VisitAttrs(tvm::AttrVisitor* v) {
-    v->Visit("graph_func", &graph_func);
-    v->Visit("use_count", &use_count);
-    v->Visit("master_idx", &master_idx);
-  }
-  static constexpr const char* _type_key = "GraphCacheEntry";
-  TVM_DECLARE_NODE_TYPE_INFO(GraphCacheEntryNode, tvm::Node);
-};
-
-class GraphCacheEntry : public ::tvm::NodeRef {
- public:
-  GraphCacheEntry() {}
-  explicit GraphCacheEntry(::tvm::NodePtr<::tvm::Node> n) : NodeRef(n) {}
-  GraphCacheEntryNode* operator->() {
-    return static_cast<GraphCacheEntryNode*>(get_mutable());
-  }
-  using ContainerType = GraphCacheEntryNode;
-};
-
-/*!
- * \brief Call compile engine to lower a graph with given inputs.
- *
- * \param graph The graph to be compiled
- * \param inputs The input specification.
- * \param target The build target
- * \param master_idx The index of master node for calling schedule
- *
- * \return func A lowered tvm function.
- */
-GraphFunc GraphLower(Graph graph,
-                     const Array<tvm::Tensor>& inputs,
-                     const std::string& target,
-                     int master_idx);
-
-/*!
- * \brief Get type flag from TVM Type
- *
- * \param type the tvm type
- * \return corresponding DLDataType
- */
-int GetTypeFlag(tvm::DataType type);
-
-/*!
- * \brief Get TVM Type from type flag
- *
- * \param type_flag the type flag
- * \return corresponding TVM type
- */
-tvm::DataType GetTVMType(int type_flag);
-
-}  // namespace compiler
-}  // namespace nnvm
-
-#endif  // NNVM_COMPILER_COMPILE_ENGINE_H_
diff --git a/nnvm/src/compiler/fold_scale_axis.cc b/nnvm/src/compiler/fold_scale_axis.cc
deleted file mode 100644
index 6e5e73788c4c..000000000000
--- a/nnvm/src/compiler/fold_scale_axis.cc
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file fold_scale_axis.cc
- * \author Fold scaling parameter of axis into weight of conv/dense
-*/
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "pattern_util.h"
-#include "graph_transform.h"
-
-namespace nnvm {
-namespace compiler {
-
-enum FoldScaleKind {
-  // No folding is applied
-  kNone,
-  // The folding decision is pending, we can fold on a state.
-  kPending,
-  // The original operator that contains the scale.
-  kProvider,
-  // The final conumer of axis scale using multiply
-  // Likely be a conv or dense operator.
-  kMulConsumer,
-  // The final conumer of axis scale using division
-  kDivConsumer
-};
-
-struct FoldChainInfo {
-  // Entry kind
-  FoldScaleKind kind{kNone};
-  // The output axis to be folded
-  int axis{0};
-  // Source node in the fold chain
-  int source{0};
-};
-
-// The entry of folding chains on which
-// we should perform folding on
-struct FoldChainEntry {
-  // Fold information
-  FoldChainInfo info;
-  // Number of outgoing fork count
-  // in forward propagation.
-  int fork_count{0};
-  // Following field only used by provider.
-  // The input index
-  int fold_input_index{1};
-  // The scale entry
-  NodeEntry scale_entry;
-};
-
-// Try to pass axis scaling to backward,
-// Given that we we know the status of current fold axis.
-// return whether the forward signal is consumed.
-using FScaleAxisBackward = std::function<
-  bool(const NodeAttrs& attrs,
-       const std::vector<TShape>& in_shape,
-       const std::vector<TShape>& out_shape,
-       const FoldChainInfo& out_info,
-       std::vector<FoldChainInfo>* in_info)>;
-
-
-// Try to pass axis scaling to forward,
-// Given that we we know the status of one of its input to be pending
-// also update other input info
-// return whether the forward signal is consumed.
-using FScaleAxisForward = std::function<
-  bool(const NodeAttrs& attrs,
-       const std::vector<TShape>& in_shape,
-       const std::vector<TShape>& out_shape,
-       std::vector<FoldChainInfo>* in_info,
-       FoldChainInfo* out_info)>;
-
-
-// Detect if there is a scaling axis happening
-bool DetectScaleAxis(const IndexedGraph& idx,
-                     uint32_t nid,
-                     const ShapeVector& shape_vec,
-                     const std::vector<uint32_t>& ref_count,
-                     bool is_forward,
-                     std::vector<FoldChainEntry>* chain) {
-  const IndexedGraph::Node& inode = idx[nid];
-  static const Op* bcast_mul = Op::Get("broadcast_mul");
-  static const Op* expand_dims = Op::Get("expand_dims");
-  if (inode.source->op() != bcast_mul) return false;
-  const TShape& oshape = shape_vec[idx.entry_id(nid, 0)];
-  CHECK_NE(oshape.ndim(), 0);
-  if (oshape.ndim() <= 1) return false;
-  for (int i = 0; i < 2; ++i) {
-    const IndexedGraph::NodeEntry& a = inode.inputs[i];
-    const IndexedGraph::NodeEntry& b = inode.inputs[1 - i];
-    std::pair<int, int> axis =
-        MatchBroadcast1DAxis(oshape, shape_vec[idx.entry_id(a)]);
-    if (axis.first != -1 &&
-        shape_vec[idx.entry_id(b)] == oshape) {
-      if (ref_count[a.node_id] != 1) return false;
-      if (is_forward && ref_count[nid] != 1) return false;
-      if (!is_forward && ref_count[b.node_id] != 1) return false;
-      const IndexedGraph::Node& anode = idx[a.node_id];
-      // mark the current entry.
-      FoldChainEntry& e = (*chain)[nid];
-      if (anode.source->is_variable()) {
-        e.fold_input_index = 1 - i;
-        e.scale_entry = inode.source->inputs[1 - i];
-      } else if (anode.source->op()  == expand_dims &&
-                 shape_vec[idx.entry_id(anode.source->inputs[0])].ndim() == 1) {
-        e.fold_input_index = 1 - i;
-        e.scale_entry = anode.source->inputs[0];
-      } else {
-        return false;
-      }
-      e.info.axis = axis.first;
-      e.info.kind = kPending;
-      e.info.source = nid;
-      e.fork_count = 1;
-      // In the backward message passing
-      // We need to eagerly pass it to the input
-      // In the forward message passing
-      // we will "pull" the message from input.
-      if (!is_forward) {
-        FoldChainEntry& enext = (*chain)[b.node_id];
-        enext.info.axis = e.info.axis;
-        enext.info.kind = kPending;
-        enext.info.source = nid;
-      }
-      return true;
-    }
-  }
-  return false;
-}
-
-Graph FoldScaleAxis(Graph src) {
-  // Operator pattern
-  static auto& fbackward =
-      nnvm::Op::GetAttr<FScaleAxisBackward>("FScaleAxisBackward");
-  static auto& fforward =
-      nnvm::Op::GetAttr<FScaleAxisForward>("FScaleAxisForward");
-  const IndexedGraph& idx = src.indexed_graph();
-  const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
-  std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
-  std::vector<FoldChainEntry> bwd_chain(idx.num_nodes());
-  std::vector<FoldChainEntry> fwd_chain(idx.num_nodes());
-  // shape hint for the inference.
-  std::vector<TShape> in_shape, out_shape;
-
-  // perform backward folding.
-  for (uint32_t i = idx.num_nodes(); i != 0; --i) {
-    uint32_t nid = i - 1;
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    if (DetectScaleAxis(idx, nid, shape_vec,
-                        ref_count, false, &bwd_chain)) continue;
-    if (bwd_chain[nid].info.kind != kPending) continue;
-    // if referred by multiple node, cannot do propagation
-    if (ref_count[nid] != 1 || !fbackward.count(inode.source->op())) {
-      bwd_chain[nid].info.kind = kNone; continue;
-    }
-    // get input shape and output shape.
-    in_shape.clear(); out_shape.clear();
-    for (const IndexedGraph::NodeEntry& e : inode.inputs) {
-      in_shape.push_back(shape_vec[idx.entry_id(e)]);
-    }
-    for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
-      out_shape.push_back(shape_vec[idx.entry_id(nid, i)]);
-    }
-    std::vector<FoldChainInfo> in_info(in_shape.size(), FoldChainInfo());
-    bool consumed = fbackward[inode.source->op()](
-        inode.source->attrs,
-        in_shape,
-        out_shape,
-        bwd_chain[nid].info,
-        &in_info);
-    CHECK_EQ(in_info.size(), in_shape.size());
-    // propagate back.
-    bool can_prop = true;
-    for (size_t i = 0; i < in_info.size(); ++i) {
-      const IndexedGraph::NodeEntry& e = inode.inputs[i];
-      if (ref_count[e.node_id] != 1 ||
-          idx[e.node_id].source->num_outputs() != 1) {
-        can_prop = false; break;
-      }
-    }
-    if (!can_prop) continue;
-    for (size_t i = 0; i < in_info.size(); ++i) {
-      const IndexedGraph::NodeEntry& e = inode.inputs[i];
-      bwd_chain[e.node_id].info = in_info[i];
-    }
-    // mark consumed by making the source as provider.
-    if (consumed) {
-      bwd_chain[bwd_chain[nid].info.source].info.kind = kProvider;
-    }
-  }
-
-
-  // perform forward folding.
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    // skip scales that are already folded in backward.
-    if (bwd_chain[nid].info.kind == kProvider) continue;
-    if (DetectScaleAxis(idx, nid, shape_vec,
-                        ref_count, true, &fwd_chain)) continue;
-    if (inode.source->num_outputs() != 1) continue;
-    // Do state update
-    // get input shape and output shape.
-    std::vector<FoldChainInfo> in_info;
-    FoldChainInfo out_info;
-    int num_inpending = 0;
-    in_shape.clear(); out_shape.clear();
-    for (const IndexedGraph::NodeEntry& e : inode.inputs) {
-      in_shape.push_back(shape_vec[idx.entry_id(e)]);
-      // input information
-      in_info.push_back(fwd_chain[e.node_id].info);
-      if (fwd_chain[e.node_id].info.kind == kPending) {
-        ++num_inpending;
-      }
-    }
-    for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
-      out_shape.push_back(shape_vec[idx.entry_id(nid, i)]);
-    }
-    if (num_inpending != 1 ||
-        !fforward.count(inode.source->op())) continue;
-    bool consumed = fforward[inode.source->op()](
-        inode.source->attrs,
-        in_shape,
-        out_shape,
-        &in_info,
-        &out_info);
-    // update input info
-    for (size_t i = 0; i < in_info.size(); ++i) {
-      fwd_chain[inode.inputs[i].node_id].info = in_info[i];
-    }
-    if (consumed) {
-      fwd_chain[nid].info = out_info;
-      for (size_t i = 0; i < in_info.size(); ++i) {
-        if (in_info[i].kind == kPending) {
-          if (--fwd_chain[in_info[i].source].fork_count == 0) {
-            fwd_chain[in_info[i].source].info.kind = kProvider;
-          }
-        }
-      }
-    } else {
-      // can propagate condition
-      if (inode.source->num_outputs() == 1) {
-        fwd_chain[nid].info = out_info;
-        if (out_info.kind == kPending) {
-          // When there is multiple reference to input
-          // every path have to be consumed
-          fwd_chain[out_info.source].fork_count += ref_count[nid] - 1;
-        }
-      }
-    }
-  }
-
-  auto transform = [&](uint32_t nid, const NodePtr& n, std::vector<NodeEntry>* ret) {
-    NodeEntry rvalue = NodeEntry{n, 0, 0};
-    {
-      // Backward chain
-      const FoldChainEntry& e = bwd_chain[nid];
-      if (e.info.kind == kMulConsumer &&
-          bwd_chain[e.info.source].info.kind == kProvider) {
-        const FoldChainEntry& se = bwd_chain[e.info.source];
-        CHECK_EQ(n->num_outputs(), 1);
-        NodeEntry scale = ExpandBiasToMatchAxis(
-            se.scale_entry,
-            shape_vec[idx.entry_id(nid, 0)].ndim(),
-            shape_vec[idx.entry_id(se.scale_entry)].ndim(),
-            e.info.axis);
-        rvalue = MakeNode("broadcast_mul", n->attrs.name + "_sc",
-                          {rvalue, scale});
-      } else if (e.info.kind == kProvider) {
-        rvalue = n->inputs[e.fold_input_index];
-      }
-    }
-    // Note that the value might get transformed twice if it
-    // folds value from both fwd and backward chain.
-    {
-      // forward chain
-      const FoldChainEntry& e = fwd_chain[nid];
-      if (e.info.kind == kMulConsumer &&
-          fwd_chain[e.info.source].info.kind == kProvider) {
-        const FoldChainEntry& se = fwd_chain[e.info.source];
-        CHECK_EQ(n->num_outputs(), 1);
-        NodeEntry scale = ExpandBiasToMatchAxis(
-            se.scale_entry,
-            shape_vec[idx.entry_id(nid, 0)].ndim(),
-            shape_vec[idx.entry_id(se.scale_entry)].ndim(),
-            e.info.axis);
-        rvalue = MakeNode("broadcast_mul", n->attrs.name + "_sc",
-                          {rvalue, scale});
-      } else if (e.info.kind == kDivConsumer &&
-                 fwd_chain[e.info.source].info.kind == kProvider) {
-        const FoldChainEntry& se = fwd_chain[e.info.source];
-        CHECK_EQ(n->num_outputs(), 1);
-        NodeEntry scale = ExpandBiasToMatchAxis(
-            se.scale_entry,
-            shape_vec[idx.entry_id(nid, 0)].ndim(),
-            shape_vec[idx.entry_id(se.scale_entry)].ndim(),
-            e.info.axis);
-        rvalue = MakeNode("broadcast_div", n->attrs.name + "_sc",
-                          {rvalue, scale});
-      } else if (e.info.kind == kProvider) {
-        rvalue = n->inputs[e.fold_input_index];
-      }
-    }
-    if (rvalue.node == n) {
-      return false;
-    } else {
-      *ret = {rvalue};
-      return true;
-    }
-  };
-  return GraphTransform(src, transform);
-}
-
-NNVM_REGISTER_PASS(FoldScaleAxis)
-.set_body(FoldScaleAxis);
-
-// property registration.
-bool ReluScaleAxisBackward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    const FoldChainInfo& out_info,
-    std::vector<FoldChainInfo>* in_axis) {
-  (*in_axis)[0] = out_info;
-  return false;
-}
-
-bool ReluScaleAxisForward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    std::vector<FoldChainInfo>* in_info,
-    FoldChainInfo* out_info) {
-  *out_info = (*in_info)[0];
-  return false;
-}
-
-NNVM_REGISTER_OP(relu)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", ReluScaleAxisBackward);
-
-NNVM_REGISTER_OP(leaky_relu)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", ReluScaleAxisBackward);
-
-NNVM_REGISTER_OP(relu)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", ReluScaleAxisForward);
-
-NNVM_REGISTER_OP(leaky_relu)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", ReluScaleAxisForward);
-
-// property registration.
-template <typename T>
-bool Pool2DBackward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    const FoldChainInfo& out_info,
-    std::vector<FoldChainInfo>* in_axis) {
-  const T& param = nnvm::get<T>(attrs.parsed);
-  if (out_info.axis == 1 && param.layout == "NCHW") {
-    (*in_axis)[0] = out_info;
-  }
-  return false;
-}
-
-template <typename T>
-bool Pool2DForward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    std::vector<FoldChainInfo>* in_info,
-    FoldChainInfo* out_info) {
-  const T& param = nnvm::get<T>(attrs.parsed);
-  if ((*in_info)[0].axis == 1 && param.layout == "NCHW") {
-    *out_info = (*in_info)[0];
-  }
-  return false;
-}
-
-NNVM_REGISTER_OP(max_pool2d)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Pool2DBackward<top::MaxPool2DParam>);
-
-NNVM_REGISTER_OP(avg_pool2d)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Pool2DBackward<top::AvgPool2DParam>);
-
-NNVM_REGISTER_OP(max_pool2d)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", Pool2DForward<top::MaxPool2DParam>);
-
-NNVM_REGISTER_OP(avg_pool2d)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", Pool2DForward<top::AvgPool2DParam>);
-
-
-
-bool BroadcastAddSubScaleAxisBackward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    const FoldChainInfo& out_info,
-    std::vector<FoldChainInfo>* in_axis) {
-  if (out_info.kind != kPending) return false;
-  for (int i = 0; i < 2; ++i) {
-    std::pair<int, int> m = MatchBroadcast1DAxis(out_shape[0], in_shape[1 - i]);
-    if (m.second != -1 &&
-        in_shape[i] == out_shape[0] &&
-        m.first == out_info.axis) {
-      (*in_axis)[i].kind = kPending;
-      (*in_axis)[i].axis = out_info.axis;
-      (*in_axis)[i].source = out_info.source;
-      (*in_axis)[1 - i].kind = kMulConsumer;
-      (*in_axis)[1 - i].axis = m.second;
-      (*in_axis)[1 - i].source = out_info.source;
-      return false;
-    }
-  }
-  return false;
-}
-
-bool BroadcastAddSubScaleAxisForward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    std::vector<FoldChainInfo>* in_info,
-    FoldChainInfo* out_info) {
-  for (int i = 0; i < 2; ++i) {
-    if ((*in_info)[i].kind == kPending) {
-      std::pair<int, int> m = MatchBroadcast1DAxis(out_shape[0], in_shape[1 - i]);
-      if (m.second != -1 &&
-          in_shape[i] == out_shape[0] &&
-          m.first == (*in_info)[i].axis) {
-        out_info->kind = kPending;
-        out_info->axis = m.first;
-        out_info->source = (*in_info)[i].source;
-        (*in_info)[1 - i].kind = kDivConsumer;
-        (*in_info)[1 - i].axis = m.second;
-        (*in_info)[1 - i].source = (*in_info)[i].source;
-        return false;
-      }
-    }
-  }
-  return false;
-}
-
-NNVM_REGISTER_OP(broadcast_add)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward);
-
-NNVM_REGISTER_OP(broadcast_sub)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", BroadcastAddSubScaleAxisBackward);
-
-NNVM_REGISTER_OP(broadcast_add)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", BroadcastAddSubScaleAxisForward);
-
-NNVM_REGISTER_OP(broadcast_sub)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", BroadcastAddSubScaleAxisForward);
-
-bool Conv2DScaleAxisBackward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    const FoldChainInfo& out_info,
-    std::vector<FoldChainInfo>* in_axis) {
-  using top::Conv2DParam;
-  const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
-  if (out_info.kind != kPending) return false;
-  // only optimize for kernel layout OIHW for now
-  if (param.kernel_layout == "OIHW" && out_info.axis == 1) {
-    (*in_axis)[1].kind = kMulConsumer;
-    (*in_axis)[1].axis = 0;
-    (*in_axis)[1].source = out_info.source;
-    if (param.use_bias) {
-      (*in_axis)[2].kind = kMulConsumer;
-      (*in_axis)[2].axis = 0;
-      (*in_axis)[2].source = out_info.source;
-    }
-    return true;
-  } else {
-    return false;
-  }
-}
-
-bool Conv2DScaleAxisForward(
-    const NodeAttrs& attrs,
-    const std::vector<TShape>& in_shape,
-    const std::vector<TShape>& out_shape,
-    std::vector<FoldChainInfo>* in_info,
-    FoldChainInfo* out_info) {
-  using top::Conv2DParam;
-  const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
-  if ((*in_info)[0].kind != kPending) return false;
-  // only optimize for nchw for now
-  if (param.kernel_layout == "OIHW" && (*in_info)[0].axis == 1) {
-    // Check whether it is depthwise conv2d
-    if (param.use_bias) {
-      CHECK_EQ(in_shape.size(), 3U) << "Input:[data, weight, bias]";
-    } else {
-      CHECK_EQ(in_shape.size(), 2U) << "Input:[data, weight]";
-    }
-
-    auto dshape = in_shape.at(0);
-    CHECK_EQ(dshape.ndim(), 4U) << "Input data shape should be 4D";
-
-    // TODO(FrozenGene): Currently, we don't support conv2d's groups != in channels.
-    if (param.groups > 1 && dshape[1] != param.groups) {
-      LOG(WARNING) << "FoldScaleAxis optimization doesn't support conv2d "
-                   << "with groups != in channels. We will skip FoldScaleAxis "
-                   << "optimization for this op.";
-      return false;
-    }
-
-
-    // input channel equals to groups, which means depthwise conv2d
-    bool is_depthwise_conv2d = (dshape[1] == param.groups);
-
-    // if it is depthwise convolution, the weight fold axis should along to axis 0.
-    // For example:
-    // data shape [1,54,63,127] weights shape [54,1,3,3], scale shape [54]
-    // depthwise convolution's weights shape means we have divided the data shape's channel
-    // to groups parties. Here, we divide 54 channels into 54 parties. Every part size is 1.
-    // weights shape's first dimision means how many parties we have divided (mapping to
-    // input shape's channel). So, in the depthwise convolution, we shouldn't do like
-    // traditional convolution(i.e. OIHW)
-
-    // Backgroud of this algorithm:
-
-    // Original Graph:
-    //    Graph(%x,
-    //          %in_scale,
-    //          %weight,
-    //          %bias,
-    //          %out_scale) {
-    //      %1 = __add_scalar__(%x, scalar='1')
-    //      %3 = expand_dims(%in_scale, num_newaxis='2', axis='1')
-    //      %4 = broadcast_mul(%1, %3)
-    //      %7 = conv2d(%4, %weight, %bias, padding='(1, 1)', kernel_size='(3, 3)', channels='2')
-    //      %8 = relu(%7)
-    //      %10 = expand_dims(%out_scale, num_newaxis='2', axis='1')
-    //      %11 = broadcast_mul(%8, %10)
-    //      ret %11
-    //    }
-
-    // Optimized Graph:
-    //    Graph(%x,
-    //          %weight,
-    //          %out_scale,
-    //          %in_scale,
-    //          %bias) {
-    //      %1 = __add_scalar__(%x, scalar='1')
-    //      %4 = expand_dims(%out_scale, num_newaxis='3', axis='1')
-    //      %5 = broadcast_mul(%weight, %4)
-    //      %7 = expand_dims(%in_scale, num_newaxis='2', axis='1')
-    //      %8 = broadcast_mul(%5, %7)
-    //      %10 = broadcast_mul(%bias, %out_scale)
-    //      %11 = conv2d(%1, %8, %10, padding='(1, 1)', kernel_size='(3, 3)', channels='2')
-    //      %12 = relu(%11)
-    //      ret %12
-    //    }
-
-    // Conv2DScaleAxisForward will need in_scale. Conv2DScaleAxisBackward will need out_scale.
-    // in_scale will apply into input data's channel (in_channel). out_scale will apply in
-    // conv2d's result, which will apply in weight's output channel.
-    // So, default Conv2DScaleAxisForward will fold axis 1 (weights' input channel).
-    // Conv2DScaleAxisBackward will fold axis 0 (weights' output channel).
-    // But depthwise convolution is another story as said previously.
-    (*in_info)[1].kind = kMulConsumer;
-    (*in_info)[1].axis = is_depthwise_conv2d ? 0 : 1;
-    (*in_info)[1].source = (*in_info)[0].source;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-NNVM_REGISTER_OP(conv2d)
-.set_attr<FScaleAxisBackward>("FScaleAxisBackward", Conv2DScaleAxisBackward);
-
-NNVM_REGISTER_OP(conv2d)
-.set_attr<FScaleAxisForward>("FScaleAxisForward", Conv2DScaleAxisForward);
-
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/graph_compile.cc b/nnvm/src/compiler/graph_compile.cc
deleted file mode 100644
index 4b738b2c520a..000000000000
--- a/nnvm/src/compiler/graph_compile.cc
+++ /dev/null
@@ -1,278 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_compile.cc
- * \brief Compile a graph. It lowers the graph nodes into low level IR.
- */
-
-#include <dmlc/parameter.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <nnvm/graph.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/pass_functions.h>
-#include <nnvm/tuple.h>
-#include <tvm/lowered_func.h>
-#include <tvm/runtime/packed_func.h>
-
-#include "compile_engine.h"
-#include "graph_fuse.h"
-#include "graph_runtime.h"
-#include "pattern_util.h"
-
-namespace nnvm {
-namespace compiler {
-
-using namespace tvm;
-
-// Decorate the result of PlanMemory
-// This function does two things:
-// - Give separate memory to each variable.
-// - Tie the memory of output/lhs in assign node properly
-//   so the execution of assign can have side effect.
-nnvm::Graph DecorateMemoryPlan(
-    nnvm::Graph g,
-    const std::vector<int>& assign_flag) {
-  const IndexedGraph& idx = g.indexed_graph();
-  StorageVector storage_vec = g.MoveCopyAttr<StorageVector>("storage_id");
-  g.attrs.erase("storage_allocated_bytes");
-  g.attrs.erase("storage_inplace_index");
-  size_t num_not_allocated = g.MoveCopyAttr<size_t>(
-      "storage_num_not_allocated");
-  CHECK_EQ(num_not_allocated, 0U)
-      << "Can only build inference graph with all statically allocated memory";
-
-  // Reassign variable id so that they are different.
-  int max_id = 0;
-  for (size_t i = 0; i < storage_vec.size(); ++i) {
-    max_id = std::max(storage_vec[i] + 1, max_id);
-  }
-  for (uint32_t nid : idx.input_nodes()) {
-    storage_vec[idx.entry_id(nid, 0)] = max_id++;
-  }
-  // Tie up the assign node storage properly.
-  for (uint32_t nid = 0 ; nid < idx.num_nodes(); ++nid) {
-    if (assign_flag[nid] == 0) continue;
-    const auto& inode = idx[nid];
-    int var_storage_id = storage_vec[idx.entry_id(inode.inputs[0])];
-    storage_vec[idx.entry_id(nid, 0)] = var_storage_id;
-
-    if (assign_flag[nid] == 2) {
-      storage_vec[idx.entry_id(inode.inputs[1])] = var_storage_id;
-    }
-  }
-  g.attrs["storage_id"] = std::make_shared<any>(std::move(storage_vec));
-  return g;
-}
-
-nnvm::Graph GraphCompile(const nnvm::Graph& g) {
-  // Get attributes from the graph.
-  const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
-  const DTypeVector& dtype_vec = g.GetAttr<DTypeVector>("dtype");
-  const GroupVec& group_vec = g.GetAttr<GroupVec>("group_root");
-  const MasterVec& master_vec = g.GetAttr<MasterVec>("group_master");
-  const PatternVec& pattern_vec = g.GetAttr<PatternVec>("pattern");
-
-  CHECK(g.HasAttr("fused_entry")) << "Fusion hasn't been applied yet.";
-  FuseEntryVec fuse_entries = g.GetAttr<FuseEntryVec>("fused_entry");
-
-  std::string target = g.GetAttr<std::string>("target");
-  std::string target_host;
-
-  if (g.HasAttr("target_host")) {
-    target_host = g.GetAttr<std::string>("target_host");
-  }
-  // Specially handle assign.
-  const nnvm::Op* assign_op = nnvm::Op::Get("_assign");
-
-  // Start lowering.
-  Array<tvm::LoweredFunc> func_list;
-  std::unordered_set<const tvm::Node*> func_set;
-  const IndexedGraph& idx = g.indexed_graph();
-
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    int root_id = group_vec[nid];
-        if (static_cast<int>(nid) != root_id) continue;
-    int master = master_vec[root_id];
-    FuseEntry& fe = fuse_entries[root_id];
-
-    const IndexedGraph& subidx = fe.subgraph.indexed_graph();
-    CHECK_EQ(subidx.input_nodes().size(), fe.imap.size());
-    CHECK_EQ(subidx.input_nodes().size(), fe.input_info.size());
-
-    Array<Tensor> inputs;
-    for (uint32_t sub_input_id : subidx.input_nodes()) {
-      auto it = fe.input_info.find(subidx[sub_input_id].source);
-      inputs.push_back(it->second);
-    }
-    // Find master idx in the subgraph.
-    int sub_master_idx = -1;
-    for (uint32_t i = 0; i < subidx.num_nodes(); i++) {
-      if (subidx[i].source->op() == idx[master].source->op()) {
-        sub_master_idx = i;
-        break;
-      }
-    }
-    CHECK_NE(sub_master_idx, -1) << "A master node not found in the subgraph.";
-    fe.compiled_func = GraphLower(fe.subgraph, inputs, target, sub_master_idx);
-    for (LoweredFunc f : fe.compiled_func->funcs) {
-      if (!func_set.count(f.get())) {
-        func_set.insert(f.get());
-        func_list.push_back(f);
-      }
-    }
-  }
-
-  const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op");
-
-  std::unordered_map<uint32_t, nnvm::NodePtr> old_new;
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) {
-      // Only copy name since that is sufficient.
-      nnvm::NodePtr np = nnvm::Node::Create();
-      np->attrs.name = inode.source->attrs.name;
-      old_new[nid] = np;
-      continue;
-    }
-    int root_id = group_vec[nid];
-    if (static_cast<int>(nid) != root_id) continue;
-
-    // Handle normal op
-    FuseEntry& fe = fuse_entries[root_id];
-    const IndexedGraph& subidx = fe.subgraph.indexed_graph();
-    nnvm::NodePtr np = nnvm::Node::Create();
-    np->attrs.op = tvm_op;
-    np->attrs.name = inode.source->attrs.name;
-    TVMOpParam param;
-    param.func_name = fe.compiled_func->func_name;
-    param.num_inputs = static_cast<uint32_t>(fe.imap.size());
-    param.num_outputs = static_cast<uint32_t>(fe.subgraph.outputs.size());
-    param.flatten_data = fe.flatten_data;
-    param.UpdateDict(&(np->attrs.dict));
-    np->attrs.parsed = std::move(param);
-
-    for (uint32_t sub_input_id : subidx.input_nodes()) {
-      // Need to make sure subgraph input order is consistent to the order of
-      // the graph input.
-      auto rit = fe.reverse_imap.find(subidx[sub_input_id].source);
-      CHECK(rit != fe.reverse_imap.end());
-      const IndexedGraph::NodeEntry& e = rit->second;
-            auto it = old_new.find(e.node_id);
-      CHECK(it != old_new.end())
-          << "cannot find node_id=" << e.node_id;
-      np->inputs.emplace_back(
-          nnvm::NodeEntry{it->second, e.index, e.version});
-    }
-    for (const uint32_t node_id : inode.control_deps) {
-      auto it = old_new.find(node_id);
-      CHECK(it != old_new.end());
-      np->control_deps.emplace_back(it->second);
-    }
-    old_new[nid] = np;
-  }
-  nnvm::Graph ret;
-  for (const auto& e : idx.outputs()) {
-    auto it = old_new.find(group_vec[e.node_id]);
-    CHECK(it != old_new.end())
-        << "cannot find node_id=" << e.node_id;
-    ret.outputs.emplace_back(
-        nnvm::NodeEntry{it->second, e.index, e.version});
-  }
-
-  // Reference counter of each op node.
-  // For now, always store result when an op is referred more than once.
-  std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
-  for (const auto& e : idx.outputs()) {
-    // This line will realize all the outputs.
-    ref_count[e.node_id] += 1;
-  }
-
-  const IndexedGraph& new_idx = ret.indexed_graph();
-
-  // Handling assign:
-  //
-  //  assign is a special operator that mutates the variable.
-  //  Currently assign is implemented as output = copy(input[1])
-  //  Then we run DecorageMemoryPlan to force
-  //  output.storage = input[0].storage
-  //
-  std::vector<int> assign_flag(new_idx.num_nodes(), 0);
-  ShapeVector new_shape_vec = ShapeVector(new_idx.num_node_entries(), TShape());
-  DTypeVector new_dtype_vec = DTypeVector(new_idx.num_node_entries());
-  std::vector<std::string> new_dltype_vec(new_idx.num_node_entries());
-
-  for (const auto& kv : old_new) {
-    uint32_t nid = kv.first;
-    const auto& inode = idx[nid];
-    uint32_t new_nid = new_idx.node_id(kv.second.get());
-    if (inode.source->op() == assign_op) {
-      // Check if rhs of assign can be computed inplace.
-      // If yes, we can simply set that memory to be assign target
-      // and change assign to nop.
-      const IndexedGraph::NodeEntry& rhs = inode.inputs[1];
-      if (ref_count[rhs.node_id] <= 1 &&
-          !(idx[rhs.node_id].source->is_variable()) &&
-          pattern_vec[group_vec[rhs.node_id]] <= kBroadcast) {
-        assign_flag[new_nid] = 2;
-        TVMOpParam& param = dmlc::get<TVMOpParam>(kv.second->attrs.parsed);
-        param.func_name = "__nop";
-        param.UpdateDict(&(kv.second->attrs.dict));
-      } else {
-        assign_flag[new_nid] = 1;
-      }
-    }
-    for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
-      uint32_t new_eid = new_idx.entry_id(new_idx.node_id(kv.second.get()), i);
-      uint32_t old_eid = idx.entry_id(nid, i);
-      new_shape_vec[new_eid] = shape_vec[old_eid];
-      new_dtype_vec[new_eid] = dtype_vec[old_eid];
-      new_dltype_vec[new_eid] = tvm::runtime::TVMType2String(
-          GetDLType(dtype_vec[old_eid]));
-    }
-  }
-  ret.attrs["shape"] = std::make_shared<any>(std::move(new_shape_vec));
-  ret.attrs["dtype"] = std::make_shared<any>(std::move(new_dtype_vec));
-  ret.attrs["dltype"] = std::make_shared<any>(std::move(new_dltype_vec));
-
-  // Setup module
-  static const PackedFunc& fbuild = GetPackedFunc("nnvm.compiler.build_target");
-  tvm::runtime::Module module = fbuild(func_list, target, target_host);
-  ret.attrs["module"] = std::make_shared<any>(std::move(module));
-  ret = nnvm::ApplyPass(ret, "PlanMemory");
-  ret = DecorateMemoryPlan(ret, assign_flag);
-  return ret;
-}
-
-NNVM_REGISTER_PASS(GraphCompile)
-    .set_body(GraphCompile)
-    .depend_graph_attr("shape")
-    .depend_graph_attr("dtype")
-    .depend_graph_attr("fused_entry")
-    .depend_graph_attr("group_root")
-    .depend_graph_attr("pattern")
-    .depend_graph_attr("group_master");
-
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/graph_fuse.cc b/nnvm/src/compiler/graph_fuse.cc
deleted file mode 100644
index f6c1332dd79c..000000000000
--- a/nnvm/src/compiler/graph_fuse.cc
+++ /dev/null
@@ -1,424 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_fuse.cc
- * \brief Fuse the operators together.
- */
-#include <dmlc/parameter.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <nnvm/graph.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/pass_functions.h>
-#include <nnvm/tuple.h>
-#include <tvm/lowered_func.h>
-#include <tvm/runtime/packed_func.h>
-#include <memory>
-#include <utility>
-#include <limits>
-#include <unordered_map>
-
-#include "graph_fuse.h"
-#include "graph_runtime.h"
-#include "pattern_util.h"
-
-namespace nnvm {
-namespace compiler {
-using namespace tvm;
-
-// Partition the graph into segments
-// Each segment will be compiled into one operator.
-// Also mark the property of the segment.
-nnvm::Graph GraphFindFusibleGroups(nnvm::Graph g) {
-  const IndexedGraph& idx = g.indexed_graph();
-  int opt_level = 2;
-  if (g.attrs.count("opt_level") != 0) {
-    opt_level = g.MoveCopyAttr<int>("opt_level");
-  }
-
-  // Get attributes from the graph
-  const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
-
-  // Reference counter of each op node
-  // For now, always store result when an op is referred more than once.
-  std::vector<uint32_t> ref_count = GetNodeRefCounts(idx);
-  for (const auto& e : idx.outputs()) {
-    // this line will realize all the outputs
-    ref_count[e.node_id] += 1;
-  }
-  // Pattern for the subgraph
-  PatternVec pattern_vec(idx.num_nodes(),  kOpaque);
-  // Whether node can be fused to parent.
-  std::vector<FuseRule> fuse_vec(idx.num_nodes(), FuseRule::kUknown);
-  // Master node id of fusion segment.
-  std::vector<int> master_vec(idx.num_nodes(), -1);
-  // Operator pattern
-  static auto& op_pattern = nnvm::Op::GetAttr<TOpPattern>("TOpPattern");
-
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) {
-      fuse_vec[nid] = FuseRule::kRealize; continue;
-    }
-    TOpPattern pt = op_pattern.get(inode.source->op(), kOpaque);
-
-    if (pt <= kBroadcast) {
-      // Check if we can fuse to the master.
-      int chosen_master = -1;
-      bool ewise = inode.source->num_outputs() == 1;
-      bool mark_as_injective = false;
-      for (const auto& e : inode.inputs) {
-        if (fuse_vec[e.node_id] == FuseRule::kUknown) {
-          TOpPattern ipt = pattern_vec[e.node_id];
-          if (ipt != kElemWise) ewise = false;
-          if (ipt <= kBroadcast) {
-            fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
-          } else if (ipt == kInjective) {
-            fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
-            mark_as_injective = true;
-          } else if (ipt == kOutEWiseFusable &&
-                     chosen_master == -1 &&
-                     shape_vec[idx.entry_id(nid, 0)] == shape_vec[idx.entry_id(e)]) {
-            chosen_master = master_vec[e.node_id];
-            fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
-          } else {
-            fuse_vec[e.node_id] = FuseRule::kRealize;
-          }
-        }
-        if (ewise) {
-          if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) {
-            ewise = false;
-          }
-        }
-      }
-      master_vec[nid] = chosen_master;
-      if (chosen_master != -1) {
-        pt = kOutEWiseFusable;
-      } else if (mark_as_injective) {
-        pt = kInjective;
-      } else {
-        pt = ewise ? kElemWise : kBroadcast;
-      }
-    } else if (pt == kInjective || pt == kCommReduce) {
-      // Fuse to the comm reduce or injective
-      for (const auto& e : inode.inputs) {
-        if (fuse_vec[e.node_id] == FuseRule::kUknown) {
-          TOpPattern ipt = pattern_vec[e.node_id];
-          if (ipt <= kInjective) {
-            fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
-          } else {
-            fuse_vec[e.node_id] = FuseRule::kRealize;
-          }
-        }
-      }
-      if (pt == kCommReduce) {
-        master_vec[nid] = nid;
-      }
-    } else {
-      // Realize
-      master_vec[nid] = nid;
-      for (const auto& e : inode.inputs) {
-        if (fuse_vec[e.node_id] == FuseRule::kUknown) {
-          fuse_vec[e.node_id] = FuseRule::kRealize;
-          if (master_vec[e.node_id] == -1) {
-            master_vec[e.node_id] = e.node_id;
-          }
-        }
-      }
-    }
-
-    pattern_vec[nid] = pt;
-    if (ref_count[nid] > 1 || opt_level < 1) {
-      fuse_vec[nid] = FuseRule::kRealize;
-      if (master_vec[nid] == -1) {
-        master_vec[nid] = nid;
-      }
-    }
-  }
-
-  // Point to the group root id of each node.
-  GroupVec group_vec(idx.num_nodes(), -1);
-  std::vector<std::vector<uint32_t> > node_ids_per_group(idx.num_nodes());
-  for (uint32_t i = idx.num_nodes(); i != 0; --i) {
-    uint32_t nid = i - 1;
-    const auto& inode = idx[nid];
-    bool is_root = false;
-    if (group_vec[nid] == -1) {
-      group_vec[nid] = nid;
-      node_ids_per_group[nid].push_back(nid);
-      is_root = true;
-    }
-
-    // Check if injective op and out_ewise_fusable op (e.g. conv2d) are in the same group.
-    bool parent_out_ewise = false;
-    bool parent_injective = false;
-    for (const auto& e : inode.inputs) {
-      if (fuse_vec[e.node_id] != FuseRule::kFuseToMaster) continue;
-      TOpPattern pt = pattern_vec[e.node_id];
-      if (pt == kOutEWiseFusable) {
-        parent_out_ewise = true;
-      } else if (pt == kInjective) {
-        parent_injective = true;
-      }
-    }
-    // Change the master node from out_ewise_fusable op to itself
-    if (parent_injective && parent_out_ewise) {
-      master_vec[nid] = nid;
-      if (!is_root) {
-        // Children nodes in the same group might be pointing to a master node in a different group.
-        for (uint32_t j : node_ids_per_group[group_vec[nid]]) {
-          master_vec[j] = nid;
-        }
-      }
-    }
-
-    // Propagate the group id.
-    for (const auto& e : inode.inputs) {
-      TOpPattern pt = pattern_vec[e.node_id];
-      if (parent_out_ewise && parent_injective) {
-        if (pt == kOutEWiseFusable) {
-          continue;  // Do not fuse out_ewise_fusable op
-        } else if (pt == kInjective) {
-          master_vec[e.node_id] = nid;
-        }
-      }
-      if (fuse_vec[e.node_id] == FuseRule::kFuseToMaster) {
-        CHECK(group_vec[e.node_id] == -1||
-              group_vec[e.node_id] == group_vec[nid]);
-        group_vec[e.node_id] = group_vec[nid];
-        node_ids_per_group[group_vec[nid]].push_back(e.node_id);
-      }
-    }
-  }
-
-  /*
-     Above algorithm will not fuse a node whose output is fed to more than one
-     child node. This is because in general, it does not make sense to fuse multiple
-     children branches with their parent, as in the following example.
-
-            conv2d
-            /  |  \
-           /   |   \
-         op    op   op
-          |    |    |
-          |    |    |
-
-     However, when all children branches meet at a certain node, there is a possibility for
-     further operator fusion. For example, all nodes in the following subgraph can be fused
-     into a single node, if three 'in-between' nodes and the bottom node are all element wise
-     operation.
-
-            conv2d
-            /  |  \
-           /   |   \
-         op    op   op
-          \    |    /
-           \   |   /
-          elemwise add
-               |
-
-     This pattern is not uncommon. For example, it arises when conv2d op is followed by exponential
-     linear unit. If bias add and batch normalization are also present, they can be fused as well.
-
-     In fact, above fusion algorithm already fuses three in-between nodes and the element wise
-     add node in the figure above. The following code fuses the conv2d node with the already
-     fused children nodes. The following patterns are supported.
-
-     * Any number of child nodes from the top node
-     * The path from the top node to bottom node can contain any number of element wise ops.
-
-     The only restriction is that in-between nodes cannot have more than one child.
-
-     The overview of the algorithm below is as follows:
-
-     1. Check if all children nodes are fused into a single op by the existing fusion algorithm
-     2. Fuse the parent node to children nodes, and update its group id to be the children's group id
-     3. If the parent node originally belongs to another group (for example, conv + batch norm),
-        propagate the new group id to a grand parent and upward
-  */
-  if (opt_level >= 1) {
-    std::vector<std::vector<uint32_t> > children_group_ids(idx.num_nodes());
-    for (uint32_t nid = idx.num_nodes() - 1; nid != 0; --nid) {
-      const auto& inode = idx[nid];
-      if (inode.source->is_variable()) continue;
-      CHECK_NE(group_vec[nid], -1);
-      if (inode.inputs.size() != 1) continue;
-      const uint32_t parent_nid = inode.inputs[0].node_id;
-      // if parent node has more than one child, record each child's group id.
-      if (ref_count[parent_nid] > 1) children_group_ids[parent_nid].push_back(group_vec[nid]);
-    }
-
-    std::vector<int> new_group_id(idx.num_nodes(), -1);
-    for (uint32_t nid = idx.num_nodes() - 1; nid != 0; --nid) {
-      if (new_group_id[group_vec[nid]] != -1) {
-        // propagate new group id from child
-        group_vec[nid] = new_group_id[group_vec[nid]];
-      }
-      TOpPattern pt = op_pattern.get(idx[nid].source->op(), kOpaque);
-      if (pt == kOpaque) continue;
-      const auto& group_ids = children_group_ids[nid];
-      if (group_ids.size() <= 1) continue;
-      const uint32_t child_group_id = group_ids[0];
-      const auto& children_node_ids = node_ids_per_group[child_group_id];
-
-      auto is_same_group_id = [child_group_id](uint32_t id) {
-          return id == child_group_id;
-      };
-      auto is_fusible_pattern = [&idx](uint32_t child_nid) {
-        TOpPattern child_pt = op_pattern.get(idx[child_nid].source->op(), kOpaque);
-        return child_pt  <= kBroadcast;
-      };
-      // fuse this node with children if
-      // all children belong to the same group and
-      // all nodes in the group are element wise or broadcast op.
-      const bool can_be_fused = std::all_of(group_ids.begin(), group_ids.end(), is_same_group_id) &&
-        std::all_of(children_node_ids.begin(), children_node_ids.end(), is_fusible_pattern);
-
-      if (can_be_fused) {
-        new_group_id[group_vec[nid]] = child_group_id;
-        group_vec[nid] = child_group_id;
-        for (uint32_t nid2 : node_ids_per_group[child_group_id]) {
-          pattern_vec[nid2] = pattern_vec[nid];
-          master_vec[nid2] = master_vec[nid];
-        }
-      }
-    }
-  }
-
-  g.attrs["group_root"] = std::make_shared<any>(std::move(group_vec));
-  g.attrs["group_master"] = std::make_shared<any>(std::move(master_vec));
-  g.attrs["pattern"] = std::make_shared<any>(std::move(pattern_vec));
-  return g;
-}
-
-NNVM_REGISTER_PASS(GraphFindFusibleGroups)
-.set_body(GraphFindFusibleGroups)
-.depend_graph_attr("shape")
-.depend_graph_attr("dtype");
-
-// Fuse the partitioned graph into segments.
-// Create a new graph with fused nodes.
-// Also inherit attribute shape, dltype from the previous graph.
-nnvm::Graph GraphFuse(nnvm::Graph g) {
-  CHECK(g.HasAttr("group_root") && g.HasAttr("pattern"))
-      << "GraphFindFusibleGroups pass hasn't been applied yet.";
-
-  const IndexedGraph& idx = g.indexed_graph();
-  // Get attributes from the graph
-  const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
-  const DTypeVector& dtype_vec = g.GetAttr<DTypeVector>("dtype");
-  const GroupVec& group_vec = g.GetAttr<GroupVec>("group_root");
-  const PatternVec& pattern_vec = g.GetAttr<PatternVec>("pattern");
-
-  // Specially handle assign op.
-  const nnvm::Op* assign_op = nnvm::Op::Get("_assign");
-
-  FuseEntryVec fuse_entries(idx.num_nodes());
-  // Setup inputs and placeholder.
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    CHECK_GE(group_vec[nid], 0);
-    int root_id = group_vec[nid];
-    FuseEntry& fe = fuse_entries[root_id];
-    fe.flatten_data = (pattern_vec[root_id] == kElemWise ||
-                       inode.source->op() == assign_op);
-    for (const auto& e : inode.inputs) {
-      if (group_vec[e.node_id] != root_id && fe.imap.count(e) == 0) {
-        Array<Expr> shape;
-        if (fe.flatten_data) {
-          // Elementwise support flatten
-          int64_t prod = 1;
-          for (int64_t x : shape_vec[idx.entry_id(e)]) {
-            prod *= x;
-          }
-          CHECK_LE(prod, static_cast<int64_t>(std::numeric_limits<int>::max()));
-          shape.push_back(make_const(DataType::Int(32), prod));
-        } else {
-          for (int64_t x : shape_vec[idx.entry_id(e)]) {
-            CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
-            shape.push_back(make_const(DataType::Int(32), x));
-          }
-        }
-        std::ostringstream os_name;
-        os_name << "input" << fe.imap.size();
-        Tensor data = placeholder(
-            shape, DataType(GetDLType(dtype_vec[idx.entry_id(e)])),
-            os_name.str());
-        NodeEntry garg = Symbol::CreateVariable(os_name.str()).outputs[0];
-        fe.imap[e] = garg;
-        fe.reverse_imap[garg.node.get()] = e;
-        fe.input_info[garg.node.get()] = std::move(data);
-      }
-    }
-  }
-
-  // Setup the Subgraph
-  std::vector<NodeEntry> subgraph_vec(idx.num_node_entries());
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    int root_id = group_vec[nid];
-    FuseEntry& fe = fuse_entries[root_id];
-    // Create a subgraph node.
-    NodePtr gnode = Node::Create();
-    gnode->attrs = inode.source->attrs;
-    // Set input entries for the subgraph node.
-    for (const auto& e : inode.inputs) {
-      if (group_vec[e.node_id] != root_id) {
-        auto it = fe.imap.find(e);
-        CHECK(it != fe.imap.end());
-        gnode->inputs.push_back(it->second);
-      } else {
-        const NodeEntry& ne = subgraph_vec[idx.entry_id(e)];
-        CHECK(!idx[e.node_id].source->is_variable());
-        CHECK(ne.node != nullptr);
-        gnode->inputs.push_back(ne);
-      }
-    }
-    // Schedule on the root node and use the master's schedule
-    if (static_cast<int>(nid) != root_id) {
-      for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
-        uint32_t eid = idx.entry_id(nid, index);
-        subgraph_vec[eid] = NodeEntry{gnode, index, 0};
-      }
-    } else {
-      for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
-        fe.subgraph.outputs.push_back(NodeEntry{gnode, index, 0});
-      }
-    }
-  }
-  g.attrs["fused_entry"] = std::make_shared<any>(std::move(fuse_entries));
-  return g;
-}
-
-NNVM_REGISTER_PASS(GraphFuse)
-    .set_body(GraphFuse)
-    .set_change_graph(true)
-    .provide_graph_attr("fused_entry")
-    .depend_graph_attr("shape")
-    .depend_graph_attr("dtype")
-    .depend_graph_attr("group_root")
-    .depend_graph_attr("group_master");
-
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/graph_fuse.h b/nnvm/src/compiler/graph_fuse.h
deleted file mode 100644
index dd8d5d57f66a..000000000000
--- a/nnvm/src/compiler/graph_fuse.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_fuse.h
- * \brief Definition of structs used by graph fusion
-*/
-#ifndef NNVM_COMPILER_GRAPH_FUSE_H_
-#define NNVM_COMPILER_GRAPH_FUSE_H_
-
-#include <nnvm/graph.h>
-#include <vector>
-#include <unordered_map>
-
-#include "compile_engine.h"
-
-namespace nnvm {
-namespace compiler {
-
-// The single fuse rule.
-enum class FuseRule {
-  kUknown,
-  kFuseToMaster,
-  kRealize
-};
-
-/*!
- * \brief Get DLDataType from dtype flag.
- *
- * \param type_flag The data type flag
- * \return corresponding DLDataType
- */
-inline DLDataType GetDLType(int type_flag) {
-  return GetTVMType(type_flag);
-}
-
-struct INodeEntryHash {
-  size_t operator()(const IndexedGraph::NodeEntry& e) const {
-    return e.node_id;
-  }
-};
-
-struct INodeEntryEqual {
-  size_t operator()(const IndexedGraph::NodeEntry &a,
-                    const IndexedGraph::NodeEntry &b) const {
-    return a.node_id == b.node_id && a.index == b.index;
-  }
-};
-
-// Auxiliary data structure for representing fused op.
-struct FuseEntry {
-  // Subgraph of the fragment
-  Graph subgraph;
-  // The input map
-  std::unordered_map<IndexedGraph::NodeEntry, nnvm::NodeEntry, INodeEntryHash,
-                     INodeEntryEqual>
-      imap;
-  // Reverse map to the old input entry
-  std::unordered_map<const Node *, IndexedGraph::NodeEntry> reverse_imap;
-  // TVM Placeholder for inputs
-  std::unordered_map<const Node *, Tensor> input_info;
-  // Whether we can flatten data
-  bool flatten_data;
-  // The corresponding function.
-  GraphFunc compiled_func;
-};
-
-// GroupVec stores the root node ids of the fused nodes.
-using GroupVec = std::vector<int>;
-
-// MasterVec stores master node ids of fused groups.
-using MasterVec = std::vector<int>;
-
-// FuseVec stores fused entries.
-using FuseEntryVec = std::vector<FuseEntry>;
-
-// PatternVec stores operator patterns.
-using PatternVec = std::vector<TOpPattern>;
-
-}  // namespace compiler
-}  // namespace nnvm
-
-#endif  // NNVM_COMPILER_GRAPH_FUSE_H_
diff --git a/nnvm/src/compiler/graph_hash.cc b/nnvm/src/compiler/graph_hash.cc
deleted file mode 100644
index 236a27375225..000000000000
--- a/nnvm/src/compiler/graph_hash.cc
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_deep_compare.cc
- * \brief Deep compare two graph structure
- */
-#include <dmlc/common.h>
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <tvm/ir.h>
-#include <tvm/runtime/packed_func.h>
-#include <functional>
-#include <vector>
-#include <utility>
-#include <algorithm>
-#include "node_attr.h"
-#include "graph_hash.h"
-
-namespace nnvm {
-namespace compiler {
-
-using namespace tvm;
-using tvm::ir::IntImm;
-
-size_t HashPlaceHolder(const Tensor& t) {
-  size_t key = t->shape.size();
-  key = dmlc::HashCombine(key, (t->dtype.code() << 8) | t->dtype.bits());
-  for (Expr s : t->shape) {
-    if (const IntImm* op = s.as<IntImm>()) {
-      key = dmlc::HashCombine(key, op->value);
-    }
-  }
-  return key;
-}
-
-bool PlaceHolderEqual(const Tensor& a, const Tensor& b) {
-  if (a->shape.size() != b->shape.size()) return false;
-  if (a->dtype != b->dtype) return false;
-  for (size_t i = 0; i < a->shape.size(); ++i) {
-    const IntImm* a_value = a->shape[i].as<IntImm>();
-    const IntImm* b_value = b->shape[i].as<IntImm>();
-    if (a_value && b_value == nullptr) return false;
-    if (b_value && a_value == nullptr) return false;
-    if (a_value == nullptr && b_value == nullptr) {
-      continue;
-    }
-    if (a_value->value != b_value->value) return false;
-  }
-  return true;
-}
-
-size_t GraphKeyHash::Hash(const GraphKey& gkey)  {
-  if (gkey->cache_hash_key_ != 0) return gkey->cache_hash_key_;
-  size_t key = dmlc::HashCombine(GraphHash(gkey->graph), gkey->target);
-  key = dmlc::HashCombine(key, gkey->inputs.size());
-  for (size_t i = 0; i < gkey->inputs.size(); ++i) {
-    key = dmlc::HashCombine(key, HashPlaceHolder(gkey->inputs[i]));
-  }
-  if (key == 0) key = 1;
-  gkey->cache_hash_key_ = key;
-  return key;
-}
-
-bool GraphKeyEqual::Equal(const GraphKey& a,
-                          const GraphKey& b) {
-  if (a->target != b->target) return false;
-  if (a->inputs.size() != b->inputs.size()) return false;
-  for (size_t i = 0; i < a->inputs.size(); ++i) {
-    if (!PlaceHolderEqual(a->inputs[i], b->inputs[i])) return false;
-  }
-  if (GraphDeepCompare(a->graph, b->graph, false).length() != 0) return false;
-  return true;
-}
-
-GraphKey GraphKeyNode::make(Graph graph,
-                            tvm::Array<Tensor> inputs,
-                            std::string target) {
-  auto n = tvm::make_node<GraphKeyNode>();
-  n->graph = std::move(graph);
-  n->inputs = inputs;
-  n->target = std::move(target);
-  return GraphKey(n);
-}
-
-TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable)
-.set_dispatch<GraphKeyNode>([](const ObjectRef& ref, IRPrinter* p) {
-    auto* op = static_cast<const GraphKeyNode*>(ref.get());
-    p->stream << "GraphKeyNode("<< op << ")";
-});
-
-
-// Run graph hash
-size_t GraphHash(const Graph& graph) {
-  const IndexedGraph& idx = graph.indexed_graph();
-  size_t key = 0;
-  // Combine a linearized sequence of ops in subgraph
-  key = dmlc::HashCombine(key, idx.num_nodes());
-  std::hash<std::string> str_hash;
-  std::vector<size_t> hash_temp;
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const IndexedGraph::Node& inode = idx[nid];
-    // Use name instad op address so it is deterministic across runs
-    if (inode.source->is_variable()) continue;
-    key = dmlc::HashCombine(key, inode.source->op()->name);
-    hash_temp.clear();
-    for (const auto& kv : GetAttrDict(inode.source->attrs)) {
-      hash_temp.push_back(dmlc::HashCombine(str_hash(kv.first), kv.second));
-    }
-    // to make sure it is deterministic
-    // since unordered_map is not deterministic
-    std::sort(hash_temp.begin(), hash_temp.end());
-    for (size_t value : hash_temp) {
-      key = dmlc::HashCombine(key, value);
-    }
-  }
-  return key;
-}
-
-// deep compare the graph structure
-// not considering the graph attributes
-// return non-empty error message if the graph mismatch.
-// the comparator won't match name of intermediate node.
-// compare_var_attr
-std::string GraphDeepCompare(const Graph& a,
-                             const Graph& b,
-                             bool compare_variable_attr) {
-  const IndexedGraph& idxa = a.indexed_graph();
-  const IndexedGraph& idxb = b.indexed_graph();
-  std::ostringstream err;
-  if (idxa.num_nodes() != idxb.num_nodes()) {
-    err << "Number of nodes mismatch (" <<  idxa.num_nodes() << " v.s " << idxb.num_nodes() << ")";
-    return err.str();
-  }
-  if (idxa.num_node_entries() != idxb.num_node_entries()) {
-    err << "Number of node entry mismatch";
-    return err.str();
-  }
-  if (idxa.outputs().size() != idxb.outputs().size()) {
-    err << "Number of outputs mismatch";
-    return err.str();
-  }
-  for (size_t i = 0; i < idxa.outputs().size(); ++i) {
-    if (idxa.outputs()[i].node_id != idxb.outputs()[i].node_id ||
-        idxa.outputs()[i].index != idxb.outputs()[i].index) {
-      err << "Output entry mismatch";
-      return err.str();
-    }
-  }
-  if (idxa.input_nodes().size() != idxb.input_nodes().size()) {
-    err << "Number of inputs mismatch";
-    return err.str();
-  }
-
-  for (uint32_t nid = 0; nid < idxa.num_nodes(); ++nid) {
-    const IndexedGraph::Node& anode = idxa[nid];
-    const IndexedGraph::Node& bnode = idxb[nid];
-    if (anode.source->op() != bnode.source->op()) {
-      err << "Node mismatch ";
-      return err.str();
-    }
-    if (anode.source->is_variable()) {
-      CHECK(bnode.source->is_variable());
-      if (!compare_variable_attr) continue;
-    }
-    AttrDict adict = GetAttrDict(anode.source->attrs);
-    AttrDict bdict = GetAttrDict(bnode.source->attrs);
-
-    auto fmatch = [&err, &anode](const AttrDict& adict, const AttrDict& bdict) {
-      for (const auto& kv : adict) {
-        auto it = bdict.find(kv.first);
-        if (it != bdict.end()) {
-          if (it->second != kv.second) {
-            err << "Node attr mismatch, op=" << anode.source->attrs.name
-                << " attr_key=" << kv.first << " " << it->second
-                << " v.s. " << kv.second;
-            return false;
-          }
-        } else {
-          err << "One attr_key=" << kv.first << " is missing in another "
-               << "op=" << anode.source->attrs.name;
-          return false;
-        }
-      }
-      return true;
-    };
-    if (!fmatch(adict, bdict)) return err.str();
-    if (adict.size() != bdict.size()) {
-      CHECK(!fmatch(bdict, adict));
-      return err.str();
-    }
-    if (anode.inputs.size() != bnode.inputs.size()) {
-      err << "Node input mismatch, op=" << anode.source->attrs.name;
-      return err.str();
-    }
-    if (anode.control_deps.size() != bnode.control_deps.size()) {
-      err << "Node control_deps mistach, op=" << anode.source->attrs.name;
-      return err.str();
-    }
-    for (size_t i = 0; i < anode.inputs.size(); ++i) {
-      const IndexedGraph::NodeEntry& ae = anode.inputs[i];
-      const IndexedGraph::NodeEntry& be = bnode.inputs[i];
-      if (ae.node_id != be.node_id ||
-          ae.index != be.index ||
-          ae.version != be.version) {
-        err << "Node input mismatch on, op=" << anode.source->attrs.name;
-        return err.str();
-      }
-    }
-    for (size_t i = 0; i < anode.control_deps.size(); ++i) {
-      if (anode.control_deps[i] != bnode.control_deps[i]) {
-        err << "Node control_dep mismatch on, op=" << anode.source->attrs.name;
-        return err.str();
-      }
-    }
-  }
-  return "";
-}
-
-TVM_REGISTER_GLOBAL("nnvm.graph.DeepCompare")
-.set_body_typed(GraphDeepCompare);
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/graph_hash.h b/nnvm/src/compiler/graph_hash.h
deleted file mode 100644
index 42c069b280c9..000000000000
--- a/nnvm/src/compiler/graph_hash.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_hash.h
- * \brief The graph hashing function.
- */
-#ifndef NNVM_COMPILER_GRAPH_HASH_H_
-#define NNVM_COMPILER_GRAPH_HASH_H_
-
-#include <dmlc/common.h>
-#include <nnvm/graph.h>
-#include <tvm/operation.h>
-#include <string>
-#include <utility>
-
-namespace nnvm {
-namespace compiler {
-
-class GraphKey;
-
-/*! \brief Key to a graph compiler cache */
-struct GraphKeyNode : public tvm::Node {
-  /*! \brief The graph structure */
-  Graph graph;
-  /* \brief The inputs to the function */
-  tvm::Array<Tensor> inputs;
-  /*! \brief The target */
-  std::string target;
-  // Cached internal hash key, invisible to the user.
-  // The graph hash key is ensured always not to be 0
-  mutable size_t cache_hash_key_{0};
-
-  void VisitAttrs(tvm::AttrVisitor* v) {
-    v->Visit("inputs", &inputs);
-    v->Visit("target", &target);
-  }
-
-  static GraphKey make(Graph graph,
-                       tvm::Array<Tensor> inputs,
-                       std::string target);
-  static constexpr const char* _type_key = "GraphKey";
-  TVM_DECLARE_NODE_TYPE_INFO(GraphKeyNode, tvm::Node);
-};
-
-TVM_DEFINE_NODE_REF(GraphKey, GraphKeyNode);
-
-/*! \brief Hashing function for graph key */
-struct GraphKeyHash {
-  size_t operator()(const GraphKey& gkey) const {
-    return Hash(gkey);
-  }
-  static size_t Hash(const GraphKey& gkey);
-};
-
-/*! \brief function for graph key */
-struct GraphKeyEqual {
-  bool operator()(const GraphKey& a,
-                  const GraphKey& b) const {
-    return Equal(a, b);
-  }
-  static bool Equal(const GraphKey& a, const GraphKey& b);
-};
-
-/*!
- * \brief Create a hash code for a given graph.
- * \return The hash code of the graph.
- */
-size_t GraphHash(const Graph& graph);
-
-/*!
- * \brief Compare two graphs
- *  return empty string if they are equal
- *  otherwise return error message
- * \param a The first graph.
- * \param b The second graph.
- * \return empty string if they are equal, otherwise return error message.
- */
-std::string GraphDeepCompare(const Graph& a,
-                             const Graph& b,
-                             bool compare_variable_attr);
-}  // namespace compiler
-}  // namespace nnvm
-
-#endif  // NNVM_COMPILER_GRAPH_HASH_H_
diff --git a/nnvm/src/compiler/graph_runtime.cc b/nnvm/src/compiler/graph_runtime.cc
deleted file mode 100644
index a4b398cd41ea..000000000000
--- a/nnvm/src/compiler/graph_runtime.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_runtime.cc
- * \brief Interface code with TVM graph runtime.
-*/
-#include <dmlc/memory_io.h>
-#include <tvm/runtime/registry.h>
-
-#include <utility>
-#include "graph_runtime.h"
-
-namespace nnvm {
-namespace compiler {
-
-using tvm::Object;
-using tvm::ObjectPtr;
-using tvm::runtime::TVMArgs;
-using tvm::runtime::TVMRetValue;
-using tvm::runtime::PackedFunc;
-
-DMLC_REGISTER_PARAMETER(TVMOpParam);
-
-// parser
-inline void TVMOpParamParser(nnvm::NodeAttrs* attrs) {
-  TVMOpParam param;
-  param.Init(attrs->dict);
-  attrs->parsed = std::move(param);
-}
-
-NNVM_REGISTER_OP(tvm_op)
-.set_attr_parser(TVMOpParamParser)
-.set_num_inputs([](const NodeAttrs& attrs) {
-    const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
-    return param.num_inputs;
-  })
-.set_num_outputs([](const NodeAttrs& attrs) {
-    const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
-    return param.num_outputs;
-  });
-
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._save_param_dict")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    CHECK_EQ(args.size() % 2, 0u);
-    size_t num_params = args.size() / 2;
-    std::vector<std::string> names;
-    names.reserve(num_params);
-    std::vector<DLTensor*> arrays;
-    arrays.reserve(num_params);
-    for (size_t i = 0; i < num_params * 2; i += 2) {
-      names.emplace_back(args[i].operator std::string());
-      arrays.emplace_back(args[i + 1].operator DLTensor*());
-    }
-    std::string bytes;
-    dmlc::MemoryStringStream strm(&bytes);
-    dmlc::Stream* fo = &strm;
-    uint64_t header = kTVMNDArrayListMagic, reserved = 0;
-    fo->Write(header);
-    fo->Write(reserved);
-    fo->Write(names);
-    {
-      uint64_t sz = static_cast<uint64_t>(arrays.size());
-      fo->Write(sz);
-      for (size_t i = 0; i < sz; ++i) {
-        tvm::runtime::SaveDLTensor(fo, arrays[i]);
-      }
-    }
-    TVMByteArray arr;
-    arr.data = bytes.c_str();
-    arr.size = bytes.length();
-    *rv = arr;
-  });
-
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._load_param_dict")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    std::string bytes = args[0];
-    std::vector<std::string> names;
-    dmlc::MemoryStringStream memstrm(&bytes);
-    dmlc::Stream* strm = &memstrm;
-    uint64_t header, reserved;
-    CHECK(strm->Read(&header))
-        << "Invalid parameters file format";
-    CHECK(header == kTVMNDArrayListMagic)
-        << "Invalid parameters file format";
-    CHECK(strm->Read(&reserved))
-        << "Invalid parameters file format";
-    CHECK(strm->Read(&names))
-        << "Invalid parameters file format";
-    uint64_t sz;
-    strm->Read(&sz, sizeof(sz));
-    size_t size = static_cast<size_t>(sz);
-    CHECK(size == names.size())
-        << "Invalid parameters file format";
-    tvm::Array<NDArrayWrapper> ret;
-    for (size_t i = 0; i < size; ++i) {
-      tvm::runtime::NDArray temp;
-      temp.Load(strm);
-      auto n = tvm::make_node<NDArrayWrapperNode>();
-      n->name = std::move(names[i]);
-      n->array = temp;
-      ret.push_back(NDArrayWrapper(n));
-    }
-    *rv = ret;
-  });
-
-TVM_REGISTER_NODE_TYPE(NDArrayWrapperNode);
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/graph_runtime.h b/nnvm/src/compiler/graph_runtime.h
deleted file mode 100644
index 252a6b243c3d..000000000000
--- a/nnvm/src/compiler/graph_runtime.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_runtime.h
- * \brief Interface code with TVM graph runtime.
-*/
-#ifndef NNVM_COMPILER_GRAPH_RUNTIME_H_
-#define NNVM_COMPILER_GRAPH_RUNTIME_H_
-
-#include <nnvm/graph.h>
-#include <tvm/base.h>
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <tvm/runtime/ndarray.h>
-#include <vector>
-#include <string>
-
-namespace nnvm {
-namespace compiler {
-
-/*! \brief Magic number for NDArray list file  */
-constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7;
-
-struct TVMOpParam : public dmlc::Parameter<TVMOpParam> {
-  std::string func_name;
-  uint32_t num_inputs;
-  uint32_t num_outputs;
-  uint32_t flatten_data;
-
-  DMLC_DECLARE_PARAMETER(TVMOpParam) {
-    DMLC_DECLARE_FIELD(func_name);
-    DMLC_DECLARE_FIELD(num_inputs).set_default(1);
-    DMLC_DECLARE_FIELD(num_outputs).set_default(1);
-    DMLC_DECLARE_FIELD(flatten_data).set_default(0);
-  }
-};
-
-
-/*!
- * \brief wrapper node container for exchange.
- */
-struct NDArrayWrapperNode : public ::tvm::Node {
-  std::string name;
-  tvm::runtime::NDArray array;
-
-  void VisitAttrs(tvm::AttrVisitor* v) {
-    v->Visit("name", &name);
-    v->Visit("array", &array);
-  }
-
-  static constexpr const char* _type_key = "NDArrayWrapper";
-  TVM_DECLARE_NODE_TYPE_INFO(NDArrayWrapperNode, tvm::Node);
-};
-
-TVM_DEFINE_NODE_REF(NDArrayWrapper, NDArrayWrapperNode);
-
-}  // namespace compiler
-}  // namespace nnvm
-
-#endif   // NNVM_COMPILER_GRAPH_RUNTIME_H_
diff --git a/nnvm/src/compiler/graph_transform.h b/nnvm/src/compiler/graph_transform.h
deleted file mode 100644
index 4b183bf2dd6c..000000000000
--- a/nnvm/src/compiler/graph_transform.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file graph_transform.h
- * \brief A mutator class that does local pattern matching and mutates a node.
-*/
-#ifndef NNVM_COMPILER_GRAPH_TRANSFORM_H_
-#define NNVM_COMPILER_GRAPH_TRANSFORM_H_
-
-#include <nnvm/graph.h>
-#include <vector>
-#include <utility>
-#include <unordered_map>
-
-namespace nnvm {
-namespace compiler {
-
-/*!
- * \brief Transform the graph to build a new Graph, in post DFS order.
- *
- *  Automatically copies node when some of its children or control_deps changed.
- *  This function won't be called in Variable.
- *
- * \param graph The original graph
- *
- * \param ftransform Function of (int nid, const NodePtr& node, std::vector<NodeEntry>* out) -> bool
- *
- *      If empty vector is returned, it means original entries should be kept.
- *
- * \tparam FTransform The transformation function.
- */
-template<typename FTransform>
-Graph GraphTransform(Graph graph, FTransform ftransform) {
-  const IndexedGraph& idx = graph.indexed_graph();
-  // new nodes
-  std::vector<NodeEntry> new_entry_map(idx.num_node_entries());
-  std::vector<bool> updated(idx.num_node_entries(), false);
-
-  // setup inputs and placeholder.
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    bool need_copy = false;
-    for (const IndexedGraph::NodeEntry& e : inode.inputs) {
-      if (updated[idx.entry_id(e)]) {
-        need_copy = true; break;
-      }
-    }
-    if (!need_copy) {
-      for (const uint32_t cid : inode.control_deps) {
-        const auto& cnode = idx[cid];
-        for (uint32_t i = 0 ; i < cnode.source->num_outputs(); ++i) {
-          if (updated[idx.entry_id(cid, i)]) {
-            need_copy = true;
-          }
-        }
-        if (need_copy) break;
-      }
-    }
-
-    if (!need_copy) {
-      std::vector<NodeEntry> ret;
-      if (ftransform(nid, inode.weak_ref.lock(), &ret)) {
-        CHECK_EQ(ret.size(), static_cast<size_t>(inode.source->num_outputs()));
-        for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) {
-          updated[idx.entry_id(nid, i)] = true;
-          new_entry_map[idx.entry_id(nid, i)] = ret[i];
-        }
-      }
-    } else {
-      NodePtr node = Node::Create();
-      node->attrs = inode.source->attrs;
-      for (size_t i = 0; i < inode.inputs.size(); ++i) {
-        const IndexedGraph::NodeEntry& e = inode.inputs[i];
-        if (updated[idx.entry_id(e)]) {
-          node->inputs.push_back(new_entry_map[idx.entry_id(e)]);
-        } else {
-          node->inputs.push_back(inode.source->inputs[i]);
-        }
-      }
-      for (size_t i = 0; i < inode.control_deps.size(); ++i) {
-        const uint32_t cid = inode.control_deps[i];
-        const auto& cnode = idx[cid];
-        CHECK_NE(cnode.source->num_outputs(), 0U);
-        NodePtr selected_ptr;
-        for (uint32_t j = 0 ; j < cnode.source->num_outputs(); ++j) {
-          NodePtr cptr = updated[idx.entry_id(cid, j)] ?
-              new_entry_map[idx.entry_id(cid, j)].node : inode.source->control_deps[i];
-          if (selected_ptr == nullptr) {
-            selected_ptr = std::move(cptr);
-          } else {
-            CHECK(selected_ptr.get() == cptr.get())
-                << "Control dependency node changed to more than one node";
-          }
-        }
-        node->control_deps.push_back(selected_ptr);
-      }
-      std::vector<NodeEntry> ret;
-      if (ftransform(nid, node, &ret)) {
-        CHECK_EQ(ret.size(), static_cast<size_t>(inode.source->num_outputs()));
-        for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) {
-          updated[idx.entry_id(nid, i)] = true;
-          new_entry_map[idx.entry_id(nid, i)] = ret[i];
-        }
-      } else {
-        for (uint32_t i = 0 ; i < inode.source->num_outputs(); ++i) {
-          updated[idx.entry_id(nid, i)] = true;
-          new_entry_map[idx.entry_id(nid, i)] = NodeEntry{node, i, 0};
-        }
-      }
-    }
-  }
-  Graph ret;
-  for (size_t i = 0; i < idx.outputs().size(); ++i) {
-    const IndexedGraph::NodeEntry& e = idx.outputs()[i];
-    if (updated[idx.entry_id(e)]) {
-      ret.outputs.push_back(new_entry_map[idx.entry_id(e)]);
-    } else {
-      ret.outputs.push_back(graph.outputs[i]);
-    }
-  }
-  return ret;
-}
-
-}  // namespace compiler
-}  // namespace nnvm
-
-#endif  // NNVM_COMPILER_GRAPH_TRANSFORM_H_
diff --git a/nnvm/src/compiler/node_attr.h b/nnvm/src/compiler/node_attr.h
deleted file mode 100644
index cd11981bffec..000000000000
--- a/nnvm/src/compiler/node_attr.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file node_attr.h
- * \brief utility to access node attributes
-*/
-#ifndef NNVM_COMPILER_NODE_ATTR_H_
-#define NNVM_COMPILER_NODE_ATTR_H_
-
-#include <nnvm/op.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <unordered_map>
-#include <string>
-
-namespace nnvm {
-namespace compiler {
-
-using AttrDict = std::unordered_map<std::string, std::string>;
-/*!
- * \brief Get canonicalized attr dict from node
- * \param attrs The node attrs
- * \return The attribute dict
- */
-inline AttrDict GetAttrDict(const NodeAttrs& attrs) {
-  static auto& fgetdict = nnvm::Op::GetAttr<FGetAttrDict>("FGetAttrDict");
-  if (fgetdict.count(attrs.op)) {
-    return fgetdict[attrs.op](attrs);
-  } else {
-    return attrs.dict;
-  }
-}
-
-}  // namespace compiler
-}  // namespace nnvm
-#endif  // NNVM_COMPILER_NODE_ATTR_H_
diff --git a/nnvm/src/compiler/packed_func_ext.cc b/nnvm/src/compiler/packed_func_ext.cc
deleted file mode 100644
index 5680af1b2550..000000000000
--- a/nnvm/src/compiler/packed_func_ext.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file packed_func_ext.cc
- * \brief Registeration of extension type.
- */
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/compiler/packed_func_ext.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <tvm/runtime/c_runtime_api.h>
-#include "node_attr.h"
-#include "compile_engine.h"
-
-namespace tvm {
-namespace runtime {
-
-TVM_REGISTER_EXT_TYPE(nnvm::Graph);
-TVM_REGISTER_EXT_TYPE(nnvm::Symbol);
-TVM_REGISTER_EXT_TYPE(nnvm::compiler::AttrDict);
-
-}  // namespace runtime
-}  // namespace tvm
-
-namespace nnvm {
-DMLC_JSON_ENABLE_ANY(int, int);
-}  // namespace nnvm
-
-namespace nnvm {
-namespace compiler {
-
-using tvm::Tensor;
-using tvm::Array;
-using tvm::Node;
-using tvm::runtime::TVMArgs;
-using tvm::runtime::TVMRetValue;
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._dict_get")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    const AttrDict& dict = args[0].AsExtension<AttrDict>();
-    std::string key = args[1];
-    auto it = dict.find(key);
-    if (it != dict.end()) {
-      *rv = it->second;
-    } else {
-      *rv = nullptr;
-    }
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._dict_size")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    const AttrDict& dict = args[0].AsExtension<AttrDict>();
-    *rv = static_cast<int64_t>(dict.size());
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._dict_keys")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    const AttrDict& dict = args[0].AsExtension<AttrDict>();
-    tvm::Array<tvm::Expr> keys;
-    for (const auto& kv : dict) {
-      keys.push_back(kv.first);
-    }
-    *rv = keys;
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.compiler._register_alter_op_layout")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-  // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
-  PackedFunc* f = new PackedFunc(args[1].operator PackedFunc());
-  Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
-  auto fpack = [f](const NodeAttrs& attrs,
-                   const Symbol& inputs,
-                   const Array<Tensor>& tinfos,
-                   Symbol* ret_symbol) {
-    TVMRetValue ret = (*f)(GetAttrDict(attrs), inputs, tinfos);
-    if (ret.type_code() == TVMTypeCode::kNull) {
-      return false;
-    }
-    CHECK_EQ(ret.type_code(), tvm::runtime::extension_type_info<Symbol>::code)
-      << " expected " << "Symbol (code = " << tvm::runtime::extension_type_info<Symbol>::code
-      << ") but get code = " << ret.type_code();
-    *ret_symbol = *(static_cast<Symbol*>(ret.value().v_handle));
-    return true;
-  };
-  op.set_attr<FTVMAlterOpLayout>("FTVMAlterOpLayout", fpack, args[2]);
-});
-
-// custom version of TVM compute
-TVM_REGISTER_GLOBAL("nnvm._register_compute")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
-    PackedFunc* f = new PackedFunc(args[1].operator PackedFunc());
-    Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
-    auto fcompute = [f](const NodeAttrs& attrs,
-                        const Array<Tensor>& inputs,
-                        const Array<Tensor>& out_info)
-        -> Array<Tensor> {
-      TVMRetValue ret = (*f)(GetAttrDict(attrs), inputs, out_info);
-      if (ret.IsObjectRef<tvm::Tensor>()) {
-        return {ret.operator Tensor()};
-      } else {
-        return ret;
-      }
-    };
-    op.set_attr<FTVMCompute>("FTVMCompute", fcompute, args[2]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm._register_schedule")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    // Intentionally copy and not de-allocate it, to avoid free pyobject during shutdown
-    PackedFunc* f = new PackedFunc(args[1].operator PackedFunc());
-    Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
-    auto fschedule = [f](const NodeAttrs& attrs,
-                         const Array<Tensor>& outs,
-                         const std::string& target) {
-      return (*f)(GetAttrDict(attrs), outs, target).operator Schedule();
-    };
-    op.set_attr<FTVMSchedule>("FTVMSchedule", fschedule, args[2]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm._register_pattern")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    Op& op = ::dmlc::Registry<nnvm::Op>::Get()->__REGISTER_OR_GET__(args[0]);
-    op.set_attr<TOpPattern>("TOpPattern", args[1].operator int(), args[2]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.graph._move_module")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    const nnvm::Graph& g = args[0].AsExtension<Graph>();
-    *rv = const_cast<nnvm::Graph*>(&g)->
-        MoveCopyAttr<tvm::runtime::Module>(args[1]);
-  });
-
-TVM_REGISTER_GLOBAL("nnvm.graph._move_graph")
-.set_body([](TVMArgs args, TVMRetValue *rv) {
-    const nnvm::Graph& g = args[0].AsExtension<Graph>();
-    std::string key = args[1];
-    if (g.attrs.count(key)) {
-      *rv = const_cast<nnvm::Graph*>(&g)->
-          MoveCopyAttr<nnvm::Graph>(key);
-    } else {
-      *rv = nullptr;
-    }
-  });
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/pattern_util.h b/nnvm/src/compiler/pattern_util.h
deleted file mode 100644
index d3f9725caefa..000000000000
--- a/nnvm/src/compiler/pattern_util.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file pattern_util.h
- * \brief Utilities for doing various pattern matching in graph.
-*/
-#ifndef NNVM_COMPILER_PATTERN_UTIL_H_
-#define NNVM_COMPILER_PATTERN_UTIL_H_
-
-#include <nnvm/graph.h>
-#include <vector>
-#include <utility>
-#include <string>
-#include <unordered_map>
-
-namespace nnvm {
-namespace compiler {
-
-/*!
- * \brief find axis in oshape, such that:
- *  bias_shape = [1,1, ... oshape[axis], 1,1,]
- *
- *  This is used to detect bias or scaling factor on channel dimension.
- * \param oshape The output shape
- * \param bias_shape The shape of bias or scaling factor.
- * \return Pair of matched axis in o shape and bias_shape if found.
- */
-inline std::pair<int, int> MatchBroadcast1DAxis(
-    const TShape& oshape, const TShape& bias_shape) {
-  dim_t axis_dim = bias_shape.ndim();
-  for (dim_t i = bias_shape.ndim(); i != 0; --i, --axis_dim) {
-    if (bias_shape[i - 1] != 1) break;
-  }
-  // everything is 1
-  if (axis_dim == 0) {
-    return {oshape.ndim()  - bias_shape.ndim(), 0};
-  }
-  axis_dim = axis_dim - 1;
-  // The bias shape is not 1D
-  for (dim_t i = 0; i < axis_dim; ++i) {
-    if (bias_shape[i] != 1) return {-1, -1};
-  }
-  int axis = static_cast<int>(
-      oshape.ndim() - bias_shape.ndim() + axis_dim);
-  if (oshape[axis] != bias_shape[axis_dim]) return {-1, -1};
-  return {axis, axis_dim};
-}
-
-/*!
- * \brief Expand bias dimension to match needed axis.
- *
- * \param bias The bias NodeEntry
- * \param out_dim output dimension.
- * \param bias_dim The current bias dimension.
- * \param axis The axis we want to match on.
- */
-inline NodeEntry
-ExpandBiasToMatchAxis(NodeEntry bias,
-                      int out_dim,
-                      int bias_dim,
-                      int axis) {
-  if (bias_dim != 1) {
-    bias = MakeNode("squeeze", bias.node->attrs.name + "_sqz", {bias});
-  }
-  int num_pad_axis = out_dim - axis - 1;
-  if (num_pad_axis > 0) {
-    std::unordered_map<std::string, std::string> kwargs{
-      {"axis", "1"},
-      {"num_newaxis", std::to_string(num_pad_axis)}};
-    return MakeNode("expand_dims", bias.node->attrs.name + "_expand",
-                    {bias}, kwargs);
-
-  } else {
-    return bias;
-  }
-}
-
-/*!
- * \brief Get the reference count of each node.
- * \param idx The IndexedGraph
- * \return ref_count vector of length number nodes.
- */
-inline std::vector<uint32_t>
-GetNodeRefCounts(const IndexedGraph& idx) {
-  std::vector<uint32_t> ref_count(idx.num_nodes(), 0);
-  for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
-    const auto& inode = idx[nid];
-    if (inode.source->is_variable()) continue;
-    for (const auto& e : inode.inputs) {
-      ++ref_count[e.node_id];
-    }
-  }
-  for (const auto& e : idx.outputs()) {
-    // this line will realize all the outputs
-    ref_count[e.node_id] += 1;
-  }
-  return ref_count;
-}
-}  // namespace compiler
-}  // namespace nnvm
-#endif  //  NNVM_COMPILER_PATTERN_UTIL_H_
diff --git a/nnvm/src/compiler/precompute_prune.cc b/nnvm/src/compiler/precompute_prune.cc
deleted file mode 100644
index cd11420b0a33..000000000000
--- a/nnvm/src/compiler/precompute_prune.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file precompute_prune.cc
- * \brief Split the graph into a pre-compute graph and a execution graph.
- *
- *  The pre-compute graph outputs parameters that can be taken
- *  by execution graph during execution phase.
- */
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <unordered_set>
-
-namespace nnvm {
-namespace compiler {
-
-nnvm::Graph PrecomputePrune(nnvm::Graph src) {
-  const auto& plist
-      = src.GetAttr<std::vector<std::string> >("param_name_list");
-  std::unordered_set<std::string> params(plist.begin(), plist.end());
-
-  std::unordered_set<nnvm::Node*> pruned;
-  nnvm::NodeEntryMap<nnvm::NodePtr> entry_var;
-  std::unordered_set<std::string> unique_name;
-  // number of edges that are not variable
-  int non_var_edge = 0;
-
-  auto replace_pruned_entry = [&] (const NodeEntry& e) {
-    if (!entry_var.count(e)) {
-      if (!e.node->is_variable()) {
-        ++non_var_edge;
-      }
-      nnvm::NodePtr var = nnvm::Node::Create();
-      var->attrs.name = e.node->attrs.name;
-      if (e.version) {
-          var->attrs.name += "_" + std::to_string(e.version);
-      }
-      if (e.node->num_outputs() != 1) {
-        var->attrs.name += "_output" + std::to_string(e.index);
-      }
-      entry_var.emplace(e, var);
-      CHECK(!unique_name.count(var->attrs.name));
-      unique_name.insert(var->attrs.name);
-      return nnvm::NodeEntry{var, 0, 0};
-    } else {
-      return nnvm::NodeEntry{entry_var.at(e), 0, 0};
-    }
-  };
-
-  DFSVisit(src.outputs, [&](const nnvm::NodePtr& n) {
-    bool can_be_pruned = true;
-    if (n->is_variable()) {
-      if (params.count(n->attrs.name)) {
-        pruned.emplace(n.get());
-      }
-      can_be_pruned = false;
-    }
-
-    for (const auto& e : n->inputs) {
-      if (!pruned.count(e.node.get())) {
-        can_be_pruned = false;
-      }
-    }
-    if (can_be_pruned) {
-      pruned.emplace(n.get());
-    } else {
-      // scan again to find edge nodes, skip variables
-      for (auto& e : n->inputs) {
-        if (pruned.count(e.node.get())) {
-          e = replace_pruned_entry(e);
-        }
-      }
-    }
-  });
-
-  // nothing being pruned.
-  if (non_var_edge == 0) {
-    return src;
-  }
-
-  for (auto& e : src.outputs) {
-    if (pruned.count(e.node.get())) {
-      e = replace_pruned_entry(e);
-    }
-  }
-
-  nnvm::Graph pre_graph;
-  pre_graph.outputs.reserve(entry_var.size());
-  std::vector<std::string> output_names;
-  output_names.reserve(entry_var.size());
-
-  for (auto kv : entry_var) {
-    pre_graph.outputs.emplace_back(kv.first);
-    output_names.emplace_back(kv.second->attrs.name);
-  }
-  // new parameter list
-  pre_graph.attrs["output_names"] =
-      std::make_shared<dmlc::any>(std::move(output_names));
-  src.attrs["precompute_graph"] =
-      std::make_shared<dmlc::any>(std::move(pre_graph));
-  return src;
-}
-
-NNVM_REGISTER_PASS(PrecomputePrune)
-.set_body(PrecomputePrune);
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/compiler/simplify_inference.cc b/nnvm/src/compiler/simplify_inference.cc
deleted file mode 100644
index 0e33a2260986..000000000000
--- a/nnvm/src/compiler/simplify_inference.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file simplify_inference.cc
- * \author Ziheng Jiang
-*/
-#include <nnvm/graph.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/graph_attr_types.h>
-#include <nnvm/pass.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "graph_transform.h"
-#include "pattern_util.h"
-
-namespace nnvm {
-namespace compiler {
-
-std::vector<NodeEntry>
-BatchNormToInferUnpack(const nnvm::NodeAttrs& attrs,
-                       nnvm::NodeEntry data,
-                       nnvm::NodeEntry gamma,
-                       nnvm::NodeEntry beta,
-                       nnvm::NodeEntry moving_mean,
-                       nnvm::NodeEntry moving_var,
-                       TShape dshape,
-                       TShape bshape) {
-  CHECK_NE(dshape.ndim(), 0);
-  CHECK(attrs.op);
-  static const  Op* bn_op = Op::Get("batch_norm");
-  CHECK(attrs.op == bn_op);
-  const auto& param = nnvm::get<top::BatchNormParam>(attrs.parsed);
-  std::string bn_name = attrs.name;
-
-  // transform batch_norm(data) to scale * data + shift
-  NodeEntry var_add_eps = MakeNode(
-      "__add_scalar__", bn_name + "_add_eps",
-      {moving_var}, {{"scalar", std::to_string(param.epsilon)}});
-
-  NodeEntry sqrt = MakeNode(
-      "sqrt", bn_name + "_sqrt", {var_add_eps});
-
-  NodeEntry scale = MakeNode(
-      "__rdiv_scalar__", bn_name + "_div",
-      {sqrt}, {{"scalar", "1"}});
-
-  if (param.scale) {
-    scale = MakeNode(
-        "elemwise_mul", bn_name + "_gamma_mul_div",
-        {scale, gamma});
-  }
-
-  NodeEntry neg_mean = MakeNode(
-      "negative", bn_name + "_neg_mean", {moving_mean});
-
-  NodeEntry shift = MakeNode(
-      "elemwise_mul", bn_name + "_neg_mean_mul_a",
-      {neg_mean, scale});
-
-  if (param.center) {
-    shift = MakeNode(
-        "elemwise_add", bn_name + "_add_beta", {shift, beta});
-  }
-  int axis = param.axis;
-  scale = ExpandBiasToMatchAxis(scale, dshape.ndim()-bshape.ndim()+1, 1, axis);
-  shift = ExpandBiasToMatchAxis(shift, dshape.ndim()-bshape.ndim()+1, 1, axis);
-
-  NodeEntry out = MakeNode("broadcast_mul", bn_name + "_a_mul_data",
-                           {data, scale});
-  out = MakeNode("broadcast_add", bn_name + "_out",
-                 {out, shift});
-  // It is invalid to ref the other values of BN after inference transform.
-  NodeEntry undef = MakeNode("__undef__", "undef", {});
-  return {out, undef, undef};
-}
-
-Graph SimplifyInference(nnvm::Graph src) {
-  // Get attributes from the graph
-  const IndexedGraph& idx = src.indexed_graph();
-  const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
-  auto transform = [&](uint32_t nid, const NodePtr& n, std::vector<NodeEntry>* ret) {
-    if (n->is_variable()) return false;
-    static const Op* bn_op = Op::Get("batch_norm");
-    static const Op* dropout_op = Op::Get("dropout");
-    if (n->op() == bn_op) {
-      *ret = BatchNormToInferUnpack(
-          n->attrs,
-          n->inputs[0],
-          n->inputs[1],
-          n->inputs[2],
-          n->inputs[3],
-          n->inputs[4],
-          shape_vec[idx.entry_id(nid, 0)],
-          shape_vec[idx.entry_id(nid, 1)]);
-      return true;
-    } else if (n->op() == dropout_op) {
-      NodeEntry undef = MakeNode("__undef__", "undef", {});
-      *ret = {n->inputs[0], undef};
-      return true;
-    } else {
-      return false;
-    }
-  };
-  return GraphTransform(src, transform);
-}
-
-NNVM_REGISTER_PASS(SimplifyInference)
-.set_body(SimplifyInference)
-.set_change_graph(true);
-
-}  // namespace compiler
-}  // namespace nnvm
diff --git a/nnvm/src/pass/plan_memory.cc b/nnvm/src/pass/plan_memory.cc
index de8bc946c525..83d8f87fa9f1 100644
--- a/nnvm/src/pass/plan_memory.cc
+++ b/nnvm/src/pass/plan_memory.cc
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -25,14 +25,13 @@
 #include <nnvm/pass.h>
 #include <nnvm/graph_attr_types.h>
 #include <nnvm/op_attr_types.h>
-#include <nnvm/top/tensor.h>
 #include <memory>
 #include "graph_algorithm.h"
 
 namespace nnvm {
 namespace pass {
 namespace {
-  using namespace nnvm::top;
+
 // Return bytes of data flag.
 static int GetDTypeSize(int type_flag) {
   switch (type_flag) {
diff --git a/nnvm/src/top/elemwise_op_common.h b/nnvm/src/top/elemwise_op_common.h
deleted file mode 100644
index 1864850eb436..000000000000
--- a/nnvm/src/top/elemwise_op_common.h
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file elemwise_op_common.h
- * \brief Common operator utilities
- */
-#ifndef NNVM_TOP_ELEMWISE_OP_COMMON_H_
-#define NNVM_TOP_ELEMWISE_OP_COMMON_H_
-
-#include <nnvm/layout.h>
-#include <nnvm/top/nn.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <functional>
-#include "op_common.h"
-
-namespace nnvm {
-namespace top {
-
-template<typename AttrType, bool (*is_none)(const AttrType&),
-         bool (*assign)(AttrType*, const AttrType&), bool reverse_infer,
-         std::string (*attr_string)(const AttrType&),
-         int n_in = -1, int n_out = -1>
-inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs,
-                         std::vector<AttrType> *in_attrs,
-                         std::vector<AttrType> *out_attrs,
-                         const AttrType& none) {
-  AttrType dattr = none;
-  size_t in_size = in_attrs->size();
-  size_t out_size = out_attrs->size();
-  if (n_in != -1)
-    in_size = static_cast<size_t>(n_in);
-  if (n_out != -1)
-    out_size = static_cast<size_t>(n_out);
-
-  auto deduce = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
-      for (size_t i = 0; i < size; ++i) {
-        CHECK(assign(&dattr, (*vec)[i]))
-          << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
-          << name << ": " << "expected " << attr_string(dattr)
-          << ", got " << attr_string((*vec)[i]);
-      }
-    };
-  deduce(in_attrs, in_size, "input");
-  if (reverse_infer) deduce(out_attrs, out_size, "output");
-
-  auto write = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
-      for (size_t i = 0; i < size; ++i) {
-        CHECK(assign(&(*vec)[i], dattr))
-          << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
-          << name << ": " << "expected " << attr_string(dattr)
-          << ", got " << attr_string((*vec)[i]);
-      }
-    };
-  write(in_attrs, in_size, "input");
-  write(out_attrs, out_size, "output");
-
-  if (is_none(dattr)) return false;
-  return true;
-}
-
-template<int n_in, int n_out>
-inline bool ElemwiseShape(const NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
-  if (n_in != -1) {
-    CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in)) << " in operator " << attrs.name;
-  }
-  if (n_out != -1) {
-    CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
-  }
-  return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
-    attrs, in_attrs, out_attrs, TShape());
-}
-
-template<int n_in, int n_out>
-inline bool ElemwiseType(const NodeAttrs& attrs,
-                         std::vector<int> *in_attrs,
-                         std::vector<int> *out_attrs) {
-  if (n_in != -1) {
-    CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in)) << " in operator " << attrs.name;
-  }
-  if (n_out != -1) {
-    CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out)) << " in operator " << attrs.name;
-  }
-  return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
-    attrs, in_attrs, out_attrs, -1);
-}
-
-inline bool ElementWiseReduceShape(const NodeAttrs& attrs,
-                                   std::vector<TShape> *in_attrs,
-                                   std::vector<TShape> *out_attrs) {
-  CHECK_EQ(out_attrs->size(), 1);
-  return ElemwiseAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
-    attrs, in_attrs, out_attrs, TShape());
-}
-
-inline bool ElementWiseReduceType(const NodeAttrs& attrs,
-                                  std::vector<int> *in_attrs,
-                                  std::vector<int> *out_attrs) {
-  CHECK_EQ(out_attrs->size(), 1);
-  return ElemwiseAttr<int, type_is_none, type_assign, true, type_string>(
-    attrs, in_attrs, out_attrs, -1);
-}
-
-template<int n_in, int n_out>
-inline bool ElemwiseFixedLayout(const NodeAttrs& attrs,
-                                std::vector<Layout> *in_layouts,
-                                const std::vector<Layout> *last_in_layouts,
-                                std::vector<Layout> *out_layouts,
-                                const std::function<Layout(const Layout& in)>& finfer) {
-  const size_t in_size = (n_in == -1) ? in_layouts->size() : static_cast<size_t>(n_in);
-  const size_t out_size = (n_out == -1) ? out_layouts->size() : static_cast<size_t>(n_out);
-
-  auto deduce = [&](Layout *target, const std::vector<Layout> *vec,
-                    size_t size, const char *name) {
-    for (size_t i = 0; i < size; ++i) {
-      if (vec->at(i).defined()) {
-        if (!target->defined()) {
-          *target = vec->at(i);
-        }
-        CHECK_EQ(*target, vec->at(i))
-          << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
-          << name << ": " << "expected " << *target
-          << ", got " << vec->at(i);
-      }
-    }
-  };
-
-  Layout in, last_in, out;
-  deduce(&in, in_layouts, in_size, "input");
-  deduce(&last_in, last_in_layouts, in_size, "input (last infer pass)");
-  deduce(&out, out_layouts, out_size, "output");
-
-  if (!last_in.defined()) {
-    last_in = in;
-  } else {
-    // else we copy in_layout produced by last infer pass to in_layout,
-    // and let LayoutTransform pass
-    // to insert an layout_transform node to fix the input layout.
-    in = last_in;
-  }
-
-  out = finfer(in);
-
-  auto write = [](std::vector<Layout> *vec, Layout& value, size_t size) {
-    for (size_t i = 0; i < size; ++i) {
-      vec->at(i) = value;
-    }
-  };
-  if (in.defined()) write(in_layouts, in, in_size);
-  if (out.defined()) write(out_layouts, out, out_size);
-
-  return true;
-}
-
-/*! \brief Fix the input layout as the previous inferred (if any) and copy to output */
-template<int n_in, int n_out>
-inline bool ElemwiseFixedLayoutCopyToOut(const NodeAttrs& attrs,
-                                         std::vector<Layout> *in_layouts,
-                                         const std::vector<Layout> *last_in_layouts,
-                                         std::vector<Layout> *out_layouts) {
-  return ElemwiseFixedLayout<n_in, n_out>(
-    attrs, in_layouts, last_in_layouts, out_layouts, [](const Layout& in) {
-    return in;
-  });
-}
-
-/*! \brief Fix the input layout as the previous inferred (if any) and do not define output */
-template<int n_in, int n_out>
-inline bool ElemwiseFixedLayoutUnknownOut(const NodeAttrs& attrs,
-                                          std::vector<Layout> *in_layouts,
-                                          const std::vector<Layout> *last_in_layouts,
-                                          std::vector<Layout> *out_layouts) {
-  return ElemwiseFixedLayout<n_in, n_out>(
-    attrs, in_layouts, last_in_layouts, out_layouts, [](const Layout& in) {
-    return Layout::Undef();
-  });
-}
-
-/*! \brief take arbitrary input layout and copy to output */
-template<int n_in, int n_out>
-inline bool ElemwiseArbitraryLayout(const NodeAttrs& attrs,
-                                    std::vector<Layout> *in_layouts,
-                                    const std::vector<Layout> *last_in_layouts,
-                                    std::vector<Layout> *out_layouts) {
-  const size_t in_size = (n_in == -1) ? in_layouts->size() : static_cast<size_t>(n_in);
-  const size_t out_size = (n_out == -1) ? out_layouts->size() : static_cast<size_t>(n_out);
-
-  Layout in;
-  for (size_t i = 0; i < in_size; ++i) {
-    if (!in.defined()) in = in_layouts->at(i);
-    CHECK_EQ(in, in_layouts->at(i))
-      << "Incompatible attr in node " << attrs.name << " at " << i
-      << "-th input: expected " << in
-      << ", got " << in_layouts->at(i);
-  }
-
-  if (in.defined()) {
-    for (size_t i = 0; i < out_size; ++i) {
-      out_layouts->at(i) = in;
-    }
-  }
-
-  return true;
-}
-
-/*!
- * \brief try to convert right layout to left layout if they are different.
- *        if the converting fails, it will use the last inferred layouts.
- */
-inline bool ElemwiseBinaryKeepLeftLayout(const NodeAttrs& attrs,
-                                         std::vector<Layout> *in_layouts,
-                                         const std::vector<Layout> *last_in_layouts,
-                                         std::vector<Layout> *out_layouts) {
-  CHECK_EQ(in_layouts->size(), 2U);
-  CHECK_EQ(last_in_layouts->size(), 2U);
-  CHECK_EQ(out_layouts->size(), 1U);
-
-  const Layout& lhs_last = (*last_in_layouts)[0];
-  const Layout& rhs_last = (*last_in_layouts)[1];
-  CHECK((lhs_last.defined() && rhs_last.defined()) ||
-        (!lhs_last.defined() && !rhs_last.defined()));
-
-  const Layout& lhs = (*in_layouts)[0];
-  const Layout& rhs = (*in_layouts)[1];
-
-  if (!lhs.defined() && !rhs.defined()) {
-    CHECK(!lhs_last.defined() && !rhs_last.defined())
-      << "Lost input layouts in node " << attrs.name
-      << ": last inferred lhs=" << lhs_last << ", rhs=" << rhs_last;
-    return true;
-  } else if (!lhs.defined()) {
-    CHECK(!lhs_last.defined() && !rhs_last.defined());
-    in_layouts->at(0) = rhs;
-    out_layouts->at(0) = rhs;
-    return true;
-  } else if (!rhs.defined()) {
-    CHECK(!lhs_last.defined() && !rhs_last.defined());
-    in_layouts->at(1) = lhs;
-    out_layouts->at(0) = lhs;
-    return true;
-  }
-
-  if (lhs == rhs) {
-    // for same layout, we can always do binary calculation
-    // and pass the layout to next layer
-    out_layouts->at(0) = lhs;
-    return true;
-  }
-
-  if (rhs.convertible(lhs)) {
-    in_layouts->at(1) = lhs;
-    out_layouts->at(0) = lhs;
-  } else {
-    CHECK(lhs_last.defined() && rhs_last.defined())
-      << "Incompatible input layouts in node " << attrs.name
-      << ". lhs: " << lhs << ", rhs: " << rhs;
-    CHECK(lhs_last == rhs_last);
-    in_layouts->at(0) = lhs_last;
-    in_layouts->at(1) = rhs_last;
-    out_layouts->at(0) = lhs_last;
-  }
-
-  return true;
-}
-
-#define NNVM_REGISTER_ELEMWISE_UNARY_OP(name)                       \
-  NNVM_REGISTER_OP(name)                                            \
-  .set_num_inputs(1)                                                \
-  .set_num_outputs(1)                                               \
-  .set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)        \
-  .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)           \
-  .set_attr<FCorrectLayout>("FCorrectLayout",                       \
-    ElemwiseArbitraryLayout<1, 1>)                                  \
-  .set_attr<FInplaceOption>("FInplaceOption",                       \
-    [](const NodeAttrs& attrs){                                     \
-      return std::vector<std::pair<int, int> >{{0, 0}};             \
-    })                                                              \
-  .add_argument("data", "Tensor", "The input tensor.")
-
-
-#define NNVM_REGISTER_INIT_OP(name)                                 \
-  NNVM_REGISTER_OP(name)                                            \
-  .set_num_inputs(0)                                                \
-  .set_num_outputs(1)
-
-
-#define NNVM_REGISTER_INIT_LIKE_OP(name)                            \
-  NNVM_REGISTER_ELEMWISE_UNARY_OP(name)                             \
-  .set_attr<FGradient>("FGradient", MakeZeroGradNodes)              \
-  .add_argument("data", "Symbol", "The input")
-
-
-#define NNVM_REGISTER_ELEMWISE_BINARY_OP(name)                      \
-  NNVM_REGISTER_OP(name)                                            \
-  .set_num_inputs(2)                                                \
-  .set_num_outputs(1)                                               \
-  .set_attr<FInferShape>("FInferShape", ElemwiseShape<2, 1>)        \
-  .set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)           \
-  .set_attr<FCorrectLayout>("FCorrectLayout",                       \
-    ElemwiseBinaryKeepLeftLayout)                                   \
-  .set_attr<FInplaceOption>("FInplaceOption",                       \
-    [](const NodeAttrs& attrs) {                                    \
-      return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}};     \
-    })                                                              \
-  .add_argument("lhs", "Tensor", "first input")                     \
-  .add_argument("rhs", "Tensor", "second input")
-
-
-#define NNVM_REGISTER_ELEMWISE_REDUCE_OP(name)                      \
-  NNVM_REGISTER_OP(name)                                            \
-  .set_num_inputs([](const NodeAttrs& attrs) {                      \
-    return static_cast<uint32_t>(                                   \
-      dmlc::get<ElementWiseReduceParam>(attrs.parsed).num_args);    \
-    })                                                              \
-  .set_attr_parser(ParamParser<ElementWiseReduceParam>)             \
-  .set_attr<FGetAttrDict>("FGetAttrDict",                           \
-    ParamGetAttrDict<ElementWiseReduceParam>)                       \
-  .set_attr<nnvm::FInferShape>("FInferShape",                       \
-    ElementWiseReduceShape)                                         \
-  .set_attr<FCorrectLayout>("FCorrectLayout",                       \
-    ElemwiseFixedLayoutCopyToOut<-1, 1>)                             \
-  .set_attr<nnvm::FInferType>("FInferType", ElementWiseReduceType)  \
-  .add_argument("args", "Symbol[]", "Positional input arguments")
-
-
-#define NNVM_REGISTER_INDICATOR_OP(name)                            \
-  NNVM_REGISTER_OP(name)                                            \
-  .set_num_outputs(1)                                               \
-  .set_attr<FInferType>(                                            \
-    "FInferType", [](const NodeAttrs& attrs,                        \
-                     std::vector<int>* in_attrs,                    \
-                     std::vector<int>* out_attrs) {                 \
-      CHECK_EQ(out_attrs->size(), 1U);                              \
-      NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0,                 \
-        static_cast<int>(kFloat32));                                \
-      return true;                                                  \
-  })                                                                \
-  .set_attr<FCorrectLayout>("FCorrectLayout",                       \
-    ElemwiseFixedLayoutUnknownOut<1, 1>)                            \
-  .set_attr<FGradient>(                                             \
-    "FGradient", [](const NodePtr& n,                               \
-                    const std::vector<NodeEntry>& ograds) {         \
-      return MakeZeroGradNodes(n, ograds);                          \
-  })
-
-
-}  // namespace top
-}  // namespace nnvm
-#endif  // NNVM_TOP_ELEMWISE_OP_COMMON_H_
diff --git a/nnvm/src/top/image/resize.cc b/nnvm/src/top/image/resize.cc
deleted file mode 100644
index a50b4ac961ea..000000000000
--- a/nnvm/src/top/image/resize.cc
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file resize.cc
- * \brief Property def of resize operators.
- */
-#include <tvm/operation.h>
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/layout.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include "../nn/nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/elemwise.h"
-#include "topi/transform.h"
-#include "topi/image/resize.h"
-#include "resize.h"
-
-namespace nnvm {
-namespace top {
-using tvm::Expr;
-using tvm::Array;
-using tvm::Tensor;
-using nnvm::compiler::FTVMCompute;
-
-DMLC_REGISTER_PARAMETER(ResizeParam);
-
-inline bool ResizeInferShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_shape,
-                             std::vector<TShape>* out_shape) {
-  static const Layout kNCHW("NCHW");
-  const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 1U);
-  CHECK_EQ(out_shape->size(), 1U);
-  TShape dshape = (*in_shape)[0];
-  if (dshape.ndim() ==  0) return false;
-  dshape = ConvertLayout(dshape, param.layout, kNCHW);
-
-  TShape oshape = dshape;
-  oshape[2] = param.size[0];
-  oshape[3] = param.size[1];
-
-  oshape = ConvertLayout(oshape, kNCHW, param.layout);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-
-  return true;
-}
-
-inline bool ResizeLayout(const NodeAttrs& attrs,
-                         std::vector<Layout> *in_layouts,
-                         const std::vector<Layout> *last_in_layouts,
-                         std::vector<Layout> *out_layouts) {
-  const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
-  CHECK_EQ(in_layouts->size(), 1U);
-  CHECK_EQ(out_layouts->size(), 1U);
-  const Layout layout(param.layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 0, layout);
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 0, layout);
-  return true;
-}
-
-NNVM_REGISTER_OP(resize)
-.describe(R"(Perform resize to input array with nearest neighbour or bilinear interpolation.
-
-- **data**: data is 4D array of shape
-            (batch_size, channels, in_height, in_width) for NCHW
-            (batch_size, in_height, in_width, channels) for NHWC
-
-- **out**: Output is 4D array of shape
-           for layout NCHW
-           (batch_size, channels, size[0], size[1])
-
-           for layout NHWC
-           (batch_size, size[0], size[1], channels)
-
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(ResizeParam::__FIELDS__())
-.set_attr_parser(ParamParser<ResizeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ResizeParam>)
-.set_attr<FInferShape>("FInferShape", ResizeInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ResizeLayout)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-  const ResizeParam& param = nnvm::get<ResizeParam>(attrs.parsed);
-  Array<Expr> oshape;
-  if (param.layout == "NCHW") {
-    oshape.push_back(out_info[0]->shape[2]);
-    oshape.push_back(out_info[0]->shape[3]);
-  } else {
-    oshape.push_back(out_info[0]->shape[1]);
-    oshape.push_back(out_info[0]->shape[2]);
-  }
-
-  return Array<Tensor>{ topi::image::resize(inputs[0], oshape, param.layout,
-                                             param.align_corners, param.method)};
-})
-.set_support_level(2);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/image/resize.h b/nnvm/src/top/image/resize.h
deleted file mode 100644
index 8c894140fabc..000000000000
--- a/nnvm/src/top/image/resize.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file resize.h
- */
-#ifndef NNVM_TOP_IMAGE_RESIZE_H_
-#define NNVM_TOP_IMAGE_RESIZE_H_
-
-#include <string>
-#include <vector>
-#include <utility>
-#include <iostream>
-#include <sstream>
-
-namespace nnvm {
-namespace top {
-
-struct ResizeParam : public dmlc::Parameter<ResizeParam> {
-  TShape size;
-  std::string layout;
-  std::string method;
-  bool align_corners;
-
-  DMLC_DECLARE_PARAMETER(ResizeParam) {
-    DMLC_DECLARE_FIELD(size)
-      .describe("Output size");
-    DMLC_DECLARE_FIELD(layout)
-      .set_default("NCHW")
-      .describe("Dimension ordering of data. Can be 'NCHW', 'NHWC', etc."
-                "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
-                "dimensions respectively. Resize is applied on the 'H' and"
-                "'W' dimensions.");
-    DMLC_DECLARE_FIELD(method)
-      .set_default("BILINEAR")
-      .describe("Specify the mode to use for scaling."
-                "NEAREST_NEIGHBOR -  Nearest Neighbor"
-                "BILINEAR - Bilinear Interpolation");
-    DMLC_DECLARE_FIELD(align_corners)
-      .set_default(false)
-      .describe("Should be true to preserve the values at the corner pixels");
-  }
-};
-
-}  // namespace top
-}  // namespace nnvm
-#endif  // NNVM_TOP_IMAGE_RESIZE_H_
diff --git a/nnvm/src/top/nn/convolution.cc b/nnvm/src/top/nn/convolution.cc
deleted file mode 100644
index 5c3b2d35991d..000000000000
--- a/nnvm/src/top/nn/convolution.cc
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file convolution.cc
- * \brief Convolution operators
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/layout.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include <tvm/tensor.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <tvm/operation.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn.h"
-
-
-using tvm::Tensor;
-using tvm::Array;
-using nnvm::compiler::FTVMCompute;
-
-namespace nnvm {
-namespace top {
-
-// conv2d
-DMLC_REGISTER_PARAMETER(Conv2DParam);
-
-inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_shape,
-                             std::vector<TShape>* out_shape) {
-  static const Layout kNCHW("NCHW");
-  static const Layout kOIHW("OIHW");
-
-  const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
-
-  const Layout in_layout(param.layout);
-  const Layout kernel_layout(param.kernel_layout);
-  CHECK(in_layout.convertible(kNCHW))
-    << "Conv only support input layouts that are convertible from NCHW."
-    << " But got " << in_layout;
-  CHECK(kernel_layout.convertible(kOIHW))
-    << "Conv only support kernel layouts that are convertible from OIHW."
-    << " But got "<< kernel_layout;
-
-  Layout out_layout(param.out_layout);
-  if (!out_layout.defined()) out_layout = in_layout;
-  CHECK(out_layout.convertible(kNCHW))
-    << "Conv only support output layouts that are convertible from NCHW."
-    << " But got " << out_layout;
-
-  if (param.use_bias) {
-    CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
-  } else {
-    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
-  }
-  CHECK_EQ(out_shape->size(), 1U);
-
-  TShape dshape = in_shape->at(0);
-  if (dshape.ndim() == 0) return false;
-  dshape = ConvertLayout(dshape, in_layout, kNCHW);
-
-  CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D";
-  CHECK_EQ(param.kernel_size.ndim(), 2U);
-  CHECK_EQ(param.strides.ndim(), 2U)
-      << "incorrect stride size: " << param.strides;
-  CHECK_EQ(param.dilation.ndim(), 2U)
-      << "incorrect dilate size: " << param.dilation;
-  CHECK_EQ(dshape[1] % param.groups, 0U)
-      << "input channels must divide group size";
-  CHECK_EQ(param.channels % param.groups, 0U)
-      << "output channels must divide group size";
-
-  TShape wshape({param.channels,
-                 dshape[1] / param.groups,
-                 param.kernel_size[0],
-                 param.kernel_size[1]});
-
-  wshape = ConvertLayout(wshape, kOIHW, kernel_layout);
-
-  if (in_shape->at(Conv2DParam::kWeight).ndim() == 0) {
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kWeight, wshape);
-  }
-  if (param.use_bias) {
-    static const Layout default_bias_layout("C");
-    TShape bias_shape({param.channels});
-    auto oc_block = out_layout.subsizeof('C');
-    if (oc_block > 0) {
-      size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0;
-      bias_shape = ConvertLayout(bias_shape, default_bias_layout,
-                                 default_bias_layout.split('C', split_axis, oc_block));
-    }
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kBias, bias_shape);
-  }
-  // dilation
-  dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
-  dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
-  TShape oshape({dshape[0], param.channels, 0, 0});
-  if (dshape[2] != 0) {
-    oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1;
-  }
-  if (dshape[3] != 0) {
-    oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1;
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, ConvertLayout(oshape, kNCHW, out_layout));
-  // Perform incomplete shape inference. Fill in the missing values in data shape.
-  // 1) We can always fill in the batch_size.
-  // 2) We can back-calculate the input height/width if the corresponding stride is 1.
-  oshape = ConvertLayout((*out_shape)[0], out_layout, kNCHW);
-  dshape[0] = oshape[0];
-  if (oshape[2] && param.strides[0] == 1) {
-    dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0];
-  }
-  if (oshape[3] && param.strides[1] == 1) {
-    dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1];
-  }
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DParam::kData,
-                          ConvertLayout(dshape, kNCHW, in_layout));
-  // Check whether the kernel sizes are valid
-  if (dshape[2] != 0) {
-    CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0])
-      << "kernel size exceed input";
-  }
-  if (dshape[3] != 0) {
-    CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1])
-        << "kernel size exceed input";
-  }
-  return true;
-}
-
-template<class Param>
-inline bool WinogradConv2DInferShape(const nnvm::NodeAttrs& attrs,
-                                     std::vector<TShape>* in_shape,
-                                     std::vector<TShape>* out_shape) {
-  static const Layout kNCHW("NCHW");
-  static const Layout kOIHW("OIHW");
-
-  const Param& param = nnvm::get<Param>(attrs.parsed);
-
-  const Layout in_layout(param.layout);
-  const Layout kernel_layout(param.kernel_layout);
-  CHECK(in_layout.convertible(kNCHW))
-    << "Conv only support input layouts that are convertible from NCHW."
-    << " But got " << in_layout;
-  CHECK(kernel_layout.convertible(kOIHW))
-    << "Conv only support kernel layouts that are convertible from OIHW."
-    << " But got "<< kernel_layout;
-
-  Layout out_layout(param.out_layout);
-  if (!out_layout.defined()) out_layout = in_layout;
-  CHECK(out_layout.convertible(kNCHW))
-    << "Conv only support output layouts that are convertible from NCHW."
-    << " But got " << out_layout;
-
-  if (param.use_bias) {
-    CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
-  } else {
-    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
-  }
-  CHECK_EQ(out_shape->size(), 1U);
-
-  TShape dshape = in_shape->at(0);
-  if (dshape.ndim() == 0) return false;
-  dshape = ConvertLayout(dshape, in_layout, kNCHW);
-
-  CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D";
-  CHECK_EQ(param.kernel_size.ndim(), 2U);
-  CHECK_EQ(param.strides.ndim(), 2U)
-      << "incorrect stride size: " << param.strides;
-  CHECK_EQ(param.dilation.ndim(), 2U)
-      << "incorrect dilate size: " << param.dilation;
-  CHECK_EQ(dshape[1] % param.groups, 0U)
-      << "input channels must divide group size";
-  CHECK_EQ(param.channels % param.groups, 0U)
-      << "output channels must divide group size";
-
-  // NOTE: Do not check weight shape here!
-  // Different backend requires different layout to compute
-  // the batch gemm stage in winograd efficiently, but we want to
-  // make this NNVM symbol work for all backends.
-  // So we accept all weight shapes, and assume the TOPI developers
-  // can handle this correctly in alter_op_layout.
-
-  if (param.use_bias) {
-    static const Layout default_bias_layout("C");
-    TShape bias_shape({param.channels});
-    auto oc_block = out_layout.subsizeof('C');
-    if (oc_block > 0) {
-      size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0;
-      bias_shape = ConvertLayout(bias_shape, default_bias_layout,
-                                 default_bias_layout.split('C', split_axis, oc_block));
-    }
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, WinogradConv2DParam::kBias, bias_shape);
-  }
-  // dilation
-  dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
-  dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
-  TShape oshape({dshape[0], param.channels, 0, 0});
-  if (dshape[2] != 0) {
-    oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1;
-  }
-  if (dshape[3] != 0) {
-    oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1;
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, ConvertLayout(oshape, kNCHW, out_layout));
-  // Perform incomplete shape inference. Fill in the missing values in data shape.
-  // 1) We can always fill in the batch_size.
-  // 2) We can back-calculate the input height/width if the corresponding stride is 1.
-  oshape = ConvertLayout((*out_shape)[0], out_layout, kNCHW);
-  dshape[0] = oshape[0];
-  if (oshape[2] && param.strides[0] == 1) {
-    dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0];
-  }
-  if (oshape[3] && param.strides[1] == 1) {
-    dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1];
-  }
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, WinogradConv2DParam::kData,
-                          ConvertLayout(dshape, kNCHW, in_layout));
-  // Check whether the kernel sizes are valid
-  if (dshape[2] != 0) {
-    CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0])
-      << "kernel size exceed input";
-  }
-  if (dshape[3] != 0) {
-    CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1])
-      << "kernel size exceed input";
-  }
-  return true;
-}
-
-template <typename PARAM>
-inline bool Conv2DInferType(const nnvm::NodeAttrs& attrs,
-                            std::vector<int>* in_type,
-                            std::vector<int>* out_type) {
-  const PARAM& param = nnvm::get<PARAM>(attrs.parsed);
-  if (param.use_bias) {
-    CHECK_EQ(in_type->size(), 3U) << "Input:[data, weight, bias]";
-  } else {
-    CHECK_EQ(in_type->size(), 2U) << "Input:[data, weight]";
-  }
-  CHECK_EQ(out_type->size(), 1U);
-  if (param.out_dtype != -1) {
-    CHECK(!type_is_none((*in_type)[0]));
-    for (size_t i = 1; i < in_type->size(); ++i) {
-      NNVM_ASSIGN_INPUT_TYPE(attrs, *in_type, i, (*in_type)[0]);
-    }
-    NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_type, 0, param.out_dtype);
-  } else {
-    ElemwiseType<-1, 1>(attrs, in_type, out_type);
-  }
-  return true;
-}
-
-
-template<typename PARAM>
-inline bool Conv2DCorrectLayout(const NodeAttrs& attrs,
-                                std::vector<Layout> *ilayouts,
-                                const std::vector<Layout> *last_ilayouts,
-                                std::vector<Layout> *olayouts) {
-  const PARAM& param = nnvm::get<PARAM>(attrs.parsed);
-
-  const Layout in_layout(param.layout);
-  Layout out_layout(param.out_layout);
-  if (!out_layout.defined()) out_layout = in_layout;
-
-  const Layout kernel_layout(param.kernel_layout);
-  if (param.use_bias) {
-    CHECK_EQ(ilayouts->size(), 3U) << "Input:[data, weight, bias]";
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
-    // automatically decide bias layout
-    Layout bias_layout("C");
-    auto oc_block = out_layout.subsizeof('C');
-    if (oc_block > 0) {
-      size_t split_axis = (out_layout.indexof('C') < out_layout.indexof('c')) ? 1 : 0;
-      bias_layout = bias_layout.split('C', split_axis, oc_block);
-    }
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 2, bias_layout);
-  } else {
-    CHECK_EQ(ilayouts->size(), 2U) << "Input:[data, weight]";
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
-  }
-
-  CHECK_EQ(olayouts->size(), 1U);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, out_layout);
-
-  return true;
-}
-
-NNVM_REGISTER_OP(conv2d)
-.describe(R"code(2D convolution layer (e.g. spatial convolution over images).
-
-This layer creates a convolution kernel that is convolved
-with the layer input to produce a tensor of
-outputs. If `use_bias` is True,
-a bias vector is created and added to the outputs.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
-            (batch_size, in_channels, height, width) if `layout` is `NCHW`.
-- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
-- **bias**: (channels,)
-- **out**:  This depends on the `layout` parameter. Output is 4D array of shape
-            (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "4D Tensor", "Weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr<FInferShape>("FInferShape", Conv2DInferShape)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<Conv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
-.set_support_level(2)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return MakeGradNode("_conv2d_grad", n,
-                        {ograds[0], n->inputs[Conv2DParam::kData],
-                         n->inputs[Conv2DParam::kWeight]},
-                        n->attrs.dict);
-});
-
-NNVM_REGISTER_OP(_contrib_conv2d_NCHWc)
-.describe(R"code(2D convolution layer (e.g. spatial convolution over images).
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "5D Tensor", "Packed input data.")
-.add_argument("weight", "6D Tensor", "Packed weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr<FInferShape>("FInferShape", Conv2DInferShape)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<Conv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
-.set_support_level(2);
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_weight_transform)
-.describe(R"code(Weight transformation of winograd fast convolution algorithm.
-Separate this into another nnvm symbol in order to enable Precompute Pass to compute the
-weight transformation in advance.
-
-- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
-)code" NNVM_ADD_FILELINE)
-.add_argument("weight", "4D Tensor", "Weight tensor.")
-.add_arguments(WinogradWeightTransformParam::__FIELDS__())
-.set_attr_parser(ParamParser<WinogradWeightTransformParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<WinogradWeightTransformParam>)
-.set_attr<FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
-                                         std::vector<TShape> *in_shape,
-                                         std::vector<TShape> *out_shape) {
-  const auto& param = nnvm::get<WinogradWeightTransformParam>(attrs.parsed);
-  const TShape &wshape = (*in_shape)[0];
-
-  CHECK_EQ(wshape.ndim(), 4) << "Weight should be a 4 dimensional tensor";
-
-  TShape oshape({param.tile_size + wshape[2] - 1,
-                 param.tile_size + wshape[3] - 1,
-                 wshape[0],
-                 wshape[1]});
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-  })
-.set_attr<FCorrectLayout>("FCorrectLayot", [](const NodeAttrs& attrs,
-                                              std::vector<Layout> *ilayouts,
-                                              const std::vector<Layout> *last_ilayouts,
-                                              std::vector<Layout> *olayouts) {
-  Layout layout("OIHW");
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, layout);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout);
-  return true;
-})
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(5);
-
-DMLC_REGISTER_PARAMETER(WinogradWeightTransformParam);
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_without_weight_transform)
-.describe(R"code(Compute conv2d with winograd algorithm.
-
-- **data**: Input is 4D array of shape  (batch_size, in_channels, height, width)
-- **weight**: Any shape
-            We do not check shape for this input tensor.
-
-- **bias**: (channels,)
-- **out**:  Output is 4D array of shape (batch_size, channels, out_height, out_width)
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "Tensor", "Transformed weight tensor.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(WinogradConv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<WinogradConv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<WinogradConv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<WinogradConv2DParam>)
-.set_attr<FInferShape>("FInferShape", WinogradConv2DInferShape<WinogradConv2DParam>)
-.set_attr<FInferType>("FInferType", Conv2DInferType<WinogradConv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<WinogradConv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<WinogradConv2DParam>)
-.set_support_level(5);
-
-DMLC_REGISTER_PARAMETER(WinogradConv2DParam);
-
-
-inline bool Conv2DWinogradNNPACKWTInferType(const nnvm::NodeAttrs& attrs,
-                                            std::vector<int>* in_type,
-                                            std::vector<int>* out_type) {
-  const WinogradNNPACKWeightTransformParam& param =
-      nnvm::get<WinogradNNPACKWeightTransformParam>(attrs.parsed);
-
-  CHECK_EQ(in_type->size(), 1U) << "Input:[weight]";
-  CHECK_EQ(out_type->size(), 1U);
-
-  if (param.out_dtype != -1) {
-    NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_type, 0, param.out_dtype);
-  } else {
-    ElemwiseType<1, 1>(attrs, in_type, out_type);
-  }
-  return true;
-}
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_nnpack_weight_transform)
-.describe(R"code(Weight transformation of winograd fast convolution algorithm.
-Separate this into another nnvm symbol in order to enable Precompute Pass to compute the
-weight transformation in advance.
-- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
-)code" NNVM_ADD_FILELINE)
-.add_argument("weight", "4D Tensor", "Weight tensor.")
-.add_arguments(WinogradNNPACKWeightTransformParam::__FIELDS__())
-.set_attr_parser(ParamParser<WinogradNNPACKWeightTransformParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<WinogradNNPACKWeightTransformParam>)
-.set_attr<FInferShape>("FInferShape", [](const nnvm::NodeAttrs& attrs,
-                                         std::vector<TShape> *in_shape,
-                                         std::vector<TShape> *out_shape) {
-  const TShape &wshape = (*in_shape)[0];
-  CHECK_EQ(wshape.ndim(), 4) << "Weight should be a 4 dimensional tensor";
-  TShape oshape({wshape[0], wshape[1], 8, 8});
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-})
-.set_attr<FCorrectLayout>("FCorrectLayout", [](const NodeAttrs& attrs,
-                                              std::vector<Layout> *ilayouts,
-                                              const std::vector<Layout> *last_ilayouts,
-                                              std::vector<Layout> *olayouts) {
-  Layout layout("OIHW");
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, layout);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout);
-  return true;
-})
-.set_attr<FInferType>("FInferType", Conv2DWinogradNNPACKWTInferType)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(5);
-
-DMLC_REGISTER_PARAMETER(WinogradNNPACKWeightTransformParam);
-
-NNVM_REGISTER_OP(_contrib_conv2d_winograd_nnpack_without_weight_transform)
-.describe(R"code(Compute conv2d with winograd nnpack.
-- **data**: Input is 4D array of shape  (batch_size, in_channels, height, width)
-- **weight**: Any shape
-            We do not check shape for this input tensor.
-- **bias**: (channels,)
-- **out**:  Output is 4D array of shape (batch_size, channels, out_height, out_width)
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "4D Tensor", "Transformed weight tensor.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr<FInferShape>("FInferShape", WinogradConv2DInferShape<Conv2DParam>)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DCorrectLayout<Conv2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DParam>)
-.set_support_level(5);
-
-
-NNVM_REGISTER_OP(_conv2d_grad)
-  .describe(R"code(2D convolution grad.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("ograd", "4D Tensor", "Output grad.")
-.add_argument("data", "4D Tensor", "Input data of conv2d.")
-.add_argument("weight", "4D Tensor", "Input weight.")
-.set_num_inputs(3)
-.set_num_outputs(UseBiasNumInputs<Conv2DParam>)
-.set_attr<FListOutputNames>("FListOutputNames", UseBiasListInputNames<Conv2DParam>)
-.set_attr_parser(ParamParser<Conv2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DParam>)
-.set_attr<FInferShape>(
-  "FInferShape", [](const nnvm::NodeAttrs& attrs,
-                    std::vector<TShape>* in_attrs,
-                    std::vector<TShape>* out_attrs) {
-    const Conv2DParam& param = nnvm::get<Conv2DParam>(attrs.parsed);
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kData, in_attrs->at(1));
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kWeight, in_attrs->at(2));
-    if (param.use_bias) {
-      NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, Conv2DParam::kBias, TShape({param.channels}));
-    }
-    return true;
-})
-.set_attr<FInferType>("FInferType", ElemwiseType<3, -1>)
-.set_attr<TIsBackward>("TIsBackward", true);
-
-
-DMLC_REGISTER_PARAMETER(Conv2DTransposeParam);
-
-inline bool Conv2DTransposeInferShape(const nnvm::NodeAttrs& attrs,
-                                      std::vector<TShape>* in_shape,
-                                      std::vector<TShape>* out_shape) {
-  static const Layout kNCHW("NCHW");
-  static const Layout kOIHW("OIHW");
-  const Conv2DTransposeParam& param = nnvm::get<Conv2DTransposeParam>(attrs.parsed);
-  const Layout layout(param.layout);
-  const Layout kernel_layout(param.kernel_layout);
-  if (param.use_bias) {
-    CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
-  } else {
-    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
-  }
-  CHECK_EQ(out_shape->size(), 1U);
-
-  const TShape& dshape = (*in_shape)[Conv2DTransposeParam::kData];
-  if (dshape.ndim() ==  0) return false;
-  TShape dshape_nchw = ConvertLayout(dshape, layout, kNCHW);
-
-  CHECK_EQ(dshape_nchw[1] % param.groups, 0U)
-      << "input num_filter must divide group size";
-  CHECK_EQ(param.channels % param.groups, 0U)
-      << "output num_filter must divide group size";
-  CHECK_EQ(param.kernel_size.ndim(), 2U)
-      << "incorrect kernel size: " << param.kernel_size;
-  CHECK_EQ(param.strides.ndim(), 2U)
-      << "incorrect stride size: " << param.strides;
-  CHECK_EQ(param.dilation.ndim(), 2U)
-      << "incorrect dilate size: " << param.dilation;
-
-  TShape wshape({dshape_nchw[1],
-                 param.channels / param.groups,
-                 param.kernel_size[0],
-                 param.kernel_size[1]});
-  wshape = ConvertLayout(wshape, kOIHW, kernel_layout);
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, Conv2DTransposeParam::kWeight, wshape);
-
-  if (param.use_bias) {
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape,
-                            Conv2DTransposeParam::kBias,
-                            TShape({param.channels}));
-  }
-  // dilation
-  dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
-  dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
-  // output shape.
-  TShape oshape({dshape_nchw[0], param.channels, 0, 0});
-  oshape[2] = (param.strides[0] * (dshape_nchw[2] - 1) + dilated_ksize_y -
-               2 * param.padding[0] + param.output_padding[0]);
-
-  oshape[3] = (param.strides[1] * (dshape_nchw[3] - 1) + dilated_ksize_x -
-               2 * param.padding[1] + param.output_padding[1]);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0,
-                           ConvertLayout(oshape, kNCHW, layout));
-  return true;
-}
-
-inline bool Conv2DTransposeCorrectLayout(const NodeAttrs& attrs,
-                                         std::vector<Layout> *ilayouts,
-                                         const std::vector<Layout> *last_ilayouts,
-                                         std::vector<Layout> *olayouts) {
-  const Conv2DTransposeParam& param = nnvm::get<Conv2DTransposeParam>(attrs.parsed);
-
-  const Layout in_layout(param.layout);
-
-  const Layout kernel_layout(param.kernel_layout);
-  if (param.use_bias) {
-    CHECK_EQ(ilayouts->size(), 3U) << "Input:[data, weight, bias]";
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 2, Layout("C"));
-  } else {
-    CHECK_EQ(ilayouts->size(), 2U) << "Input:[data, weight]";
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 0, in_layout);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kernel_layout);
-  }
-
-  CHECK_EQ(olayouts->size(), 1U);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, in_layout);
-
-  return true;
-}
-
-NNVM_REGISTER_OP(conv2d_transpose)
-.describe(R"code(Transposed 2D convolution layer (sometimes called Deconvolution).
-
-The need for transposed convolutions generally arises
-from the desire to use a transformation going in the opposite direction
-of a normal convolution, i.e., from something that has the shape of the
-output of some convolution to something that has the shape of its input
-while maintaining a connectivity pattern that is compatible with
-said convolution.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
-            (batch_size, in_channels, height, width) if `layout` is `NCHW`.
-- **weight**: (in_channels, channels, kernel_size[0], kernel_size[1])
-- **bias**: (channels,)
-- **out**:  This depends on the `layout` parameter. Output is 4D array of shape
-v            (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
-
-            out_height and out_width are calculated as::
-                out_height = (height-1)*strides[0]-2*padding[0]+kernel_size[0]+output_padding[0]
-                out_width = (width-1)*strides[1]-2*padding[1]+kernel_size[1]+output_padding[1]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_argument("weight", "4D Tensor", "Weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(Conv2DTransposeParam::__FIELDS__())
-.set_attr_parser(ParamParser<Conv2DTransposeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<Conv2DTransposeParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<Conv2DTransposeParam>)
-.set_attr<FInferShape>("FInferShape", Conv2DTransposeInferShape)
-.set_attr<FInferType>("FInferType", Conv2DInferType<Conv2DTransposeParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Conv2DTransposeCorrectLayout)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<Conv2DTransposeParam>)
-.set_support_level(2);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc
deleted file mode 100644
index 1864ccd3506f..000000000000
--- a/nnvm/src/top/nn/nn.cc
+++ /dev/null
@@ -1,748 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nn.cc
- * \brief Property def of nn operators.
- */
-#include <tvm/operation.h>
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/layout.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn/dense.h"
-#include "topi/nn.h"
-#include "topi/nn/softmax.h"
-
-namespace nnvm {
-namespace top {
-
-using tvm::Var;
-using tvm::Expr;
-using tvm::Tensor;
-using tvm::Array;
-using nnvm::compiler::FTVMCompute;
-
-// dense
-DMLC_REGISTER_PARAMETER(DenseParam);
-
-inline bool DenseInferShape(const nnvm::NodeAttrs& attrs,
-                            std::vector<TShape>* in_shape,
-                            std::vector<TShape>* out_shape) {
-  const DenseParam& param = nnvm::get<DenseParam>(attrs.parsed);
-  if (param.use_bias) {
-    CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
-  } else {
-    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
-  }
-  CHECK_EQ(out_shape->size(), 1U);
-  // reverse infer
-  if ((*out_shape)[0].ndim() != 0) {
-    TShape dshape = (*out_shape)[0];
-    dshape[dshape.ndim() - 1] = 0;
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, DenseParam::kData, dshape);
-  }
-  dim_t num_inputs = 0;
-  if ((*in_shape)[DenseParam::kData].ndim() != 0) {
-    TShape oshape = (*in_shape)[DenseParam::kData];
-    num_inputs = oshape[oshape.ndim() - 1];
-    oshape[oshape.ndim() - 1] = param.units;
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  }
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, DenseParam::kWeight,
-                          TShape({param.units, num_inputs}));
-  if (param.use_bias) {
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, DenseParam::kBias, TShape({param.units}));
-  }
-  return true;
-}
-
-NNVM_REGISTER_OP(dense)
-.describe(R"code(Applies a linear transformation: :math:`Y = XW^T + b`.
-
-- **data**: `(x1, x2, ..., xn, input_dim)`
-- **weight**: `(units, input_dim)`
-- **bias**: `(units,)`
-- **out**: `(x1, x2, ..., xn, units)`
-
-The learnable parameters include both ``weight`` and ``bias``.
-
-If ``use_bias`` is set to be false, then the ``bias`` term is ignored.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "nD Tensor", "Input data.")
-.add_argument("weight", "2D Tensor", "Weight matrix.")
-.add_argument("bias", "1D Tensor", "Bias parameter.")
-.add_arguments(DenseParam::__FIELDS__())
-.set_attr_parser(ParamParser<DenseParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<DenseParam>)
-.set_num_outputs(1)
-.set_num_inputs(UseBiasNumInputs<DenseParam>)
-.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<DenseParam>)
-.set_attr<FInferShape>("FInferShape", DenseInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
-// leave weight & bias layout undefined
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    const DenseParam& param = nnvm::get<DenseParam>(n->attrs.parsed);
-
-    NodeEntry data_grad = MakeNode("matmul",
-                                   n->attrs.name + "_data_grad",
-                                   {ograds[0], n->inputs[DenseParam::kWeight]});
-    NodeEntry w_grad_sub = MakeNode("matmul",
-                                     n->attrs.name + "_weight_grad_sub0",
-                                     {ograds[0], n->inputs[DenseParam::kData]},
-                                     {{"transpose_a", "true"}});
-    TShape w_reduce_axis = {0, -1};
-    std::ostringstream w_oss; w_oss << w_reduce_axis;
-    NodeEntry w_grad = MakeNode("sum", n->attrs.name + "_weight_grad",
-                                {w_grad_sub},
-                                {{"axis", w_oss.str()}, {"exclude", "true"}});
-    std::vector<NodeEntry> grads = {data_grad, w_grad};
-
-    if (param.use_bias) {
-      TShape axis = {-1};
-      std::ostringstream b_oss; b_oss << axis;
-      grads.push_back(MakeNode("sum", n->attrs.name + "_bias_grad",
-                      {ograds[0]},
-                      {{"axis", b_oss.str()}, {"exclude", "true"}}));
-    }
-    return grads;
-})
-.set_support_level(1);
-
-// relu
-NNVM_REGISTER_ELEMWISE_UNARY_OP(relu)
-.describe(R"code(Computes rectified linear.
-
-.. math::
-   max(input, 0)
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::relu(inputs[0], 0.0f) };
-  })
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = relu(x)
-    // grad = indicator(x > 0) * ograd
-    NodeEntry sub0 = MakeNode("zeros_like", n->attrs.name + "_sub0",
-                              {n->inputs[0]});
-    NodeEntry sub1 = MakeNode("greater", n->attrs.name + "_sub1",
-                              {n->inputs[0], sub0}, {{"exclude", "true"}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad",
-               {ograds[0], sub1})
-    };
-})
-.set_support_level(1);
-
-// dropout
-DMLC_REGISTER_PARAMETER(DropoutParam);
-
-NNVM_REGISTER_OP(dropout)
-.describe(R"(Applies dropout operation to input array.
-
-- During training, each element of the input is set to zero with probability p.
-  The whole array is rescaled by :math:`1/(1-p)` to keep the expected
-  sum of the input unchanged.
-
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input to which dropout will be applied")
-.add_arguments(DropoutParam::__FIELDS__())
-.set_attr_parser(ParamParser<DropoutParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<DropoutParam>)
-.set_num_inputs(1)
-.set_num_outputs(2)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 2>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 2>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_attr<FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) {
-    return 1;
-  })
-.set_attr<FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"output", "mask"};
-  })
-.set_support_level(1);
-
-// batchnorm
-DMLC_REGISTER_PARAMETER(BatchNormParam);
-
-inline bool BatchNormInferShape(const nnvm::NodeAttrs& attrs,
-                                std::vector<TShape>* in_shape,
-                                std::vector<TShape>* out_shape) {
-  const BatchNormParam& param = nnvm::get<BatchNormParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 5U)
-      << "Input:[data, gamma, beta, moving_mean, moving_var]";
-  CHECK_EQ(out_shape->size(), 3U);
-  const TShape &dshape = in_shape->at(0);
-  if (dshape.ndim() == 0) return false;
-  CHECK((size_t)param.axis < dshape.Size());
-
-  TShape bshape({dshape[param.axis]});
-  if (in_shape->at(1).ndim() == 0) in_shape->at(1) = bshape;
-  if (in_shape->at(2).ndim() == 0) in_shape->at(2) = bshape;
-  if (in_shape->at(3).ndim() == 0) in_shape->at(3) = bshape;
-  if (in_shape->at(4).ndim() == 0) in_shape->at(4) = bshape;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, dshape);
-  out_shape->at(1) = in_shape->at(3);
-  out_shape->at(2) = in_shape->at(4);
-  return true;
-}
-
-inline bool BatchNormCorrectLayout(const NodeAttrs& attrs,
-                                   std::vector<Layout> *in_layouts,
-                                   const std::vector<Layout> *last_in_layouts,
-                                   std::vector<Layout> *out_layouts) {
-  const BatchNormParam& param = nnvm::get<BatchNormParam>(attrs.parsed);
-  CHECK_EQ(in_layouts->size(), 5U);
-  CHECK_EQ(last_in_layouts->size(), 5U);
-  CHECK_EQ(out_layouts->size(), 3U);
-
-  Layout data_layout = in_layouts->at(0);
-  const Layout& origin_data_layout = last_in_layouts->at(0);
-  Layout param_layout("C");
-  if (data_layout.defined()) {
-    if (data_layout.indexof('C') != param.axis) {
-      CHECK(origin_data_layout.defined())
-        << "Channel in data layout " << data_layout
-        << " is not at index " << param.axis;
-      // convert it to the original one.
-      data_layout = origin_data_layout;
-      NNVM_ASSIGN_LAYOUT(*in_layouts, 0, origin_data_layout);
-    } else if (data_layout.indexof('c') >= 0 &&
-               static_cast<uint32_t>(data_layout.indexof('c')) != (data_layout.ndim()-1)) {
-      CHECK(origin_data_layout.defined())
-        << "sub-channel c in data layout " << data_layout
-        << " does not at the final dimension";
-      // convert it to the original one.
-      data_layout = origin_data_layout;
-      NNVM_ASSIGN_LAYOUT(*in_layouts, 0, origin_data_layout);
-    } else {
-      for (Layout::LayoutDim axis : data_layout) {
-        if (Layout::is_subdim(axis) && axis != 'c') {
-          CHECK(origin_data_layout.defined())
-            << "sub-axis other than c appears in data layout " << data_layout;
-          // convert it to the original one.
-          data_layout = origin_data_layout;
-          NNVM_ASSIGN_LAYOUT(*in_layouts, 0, origin_data_layout);
-          break;
-        }
-      }
-    }
-
-    // decide the param layout
-    if (data_layout.defined()) {
-      auto channel_block = data_layout.subsizeof('C');
-      if (channel_block > 0) {
-        param_layout = param_layout.split('C', 1, channel_block);
-      }
-    }
-  }
-
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 0, data_layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 1, param_layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 2, param_layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 3, param_layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 4, param_layout);
-
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 0, data_layout);
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 1, param_layout);
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 2, param_layout);
-  return true;
-}
-
-NNVM_REGISTER_OP(batch_norm)
-.describe(R"(Batch normalization layer (Ioffe and Szegedy, 2014).
-Normalizes the input at each batch, i.e. applies a transformation
-that maintains the mean activation close to 0 and the activation
-standard deviation close to 1.
-
-.. math::
-
-  data\_mean[i] = mean(data[:,i,:,...]) \\
-  data\_var[i] = var(data[:,i,:,...])
-
-Then compute the normalized output, which has the same shape as input, as following:
-
-.. math::
-
-  out[:,i,:,...] = \frac{data[:,i,:,...] - data\_mean[i]}{\sqrt{data\_var[i]+\epsilon}} * gamma[i] + beta[i]
-
-Both *mean* and *var* returns a scalar by treating the input as a vector.
-
-Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta`` have shape *(k,)*.
-
-Besides the inputs and the outputs, this operator accepts two auxiliary
-states, ``moving_mean`` and ``moving_var``, which are *k*-length
-vectors. They are global statistics for the whole dataset, which are updated
-by::
-
-  moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
-  moving_var = moving_var * momentum + data_var * (1 - momentum)
-
-The parameter ``axis`` specifies which axis of the input shape denotes
-the 'channel' (separately normalized groups).  The default is 1.  Specifying -1 sets the channel
-axis to be the last item in the input shape.
-
-.. note::
-    This operator can be optimized away for inference.
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input to which dropout will be applied")
-.add_argument("gamma", "Tensor", "The gamma scale factor")
-.add_argument("beta", "Tensor", "The beta offset factor")
-.add_argument("moving_mean", "Tensor", "running mean of input")
-.add_argument("moving_var", "Tensor", "running variance of input")
-.add_arguments(BatchNormParam::__FIELDS__())
-.set_attr_parser(ParamParser<BatchNormParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<BatchNormParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", BatchNormCorrectLayout)
-.set_num_inputs(5)
-.set_num_outputs(3)
-.set_attr<FInferShape>("FInferShape", BatchNormInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<5, 3>)
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"data", "gamma", "beta", "moving_mean", "moving_var"};
-  })
-.set_attr<FListOutputNames>("FListOutputNames", [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"output", "mean", "var"};
-  })
-.set_attr<FNumVisibleOutputs>("FNumVisibleOutputs", [](const NodeAttrs& attrs) {
-    return 1;
-  })
-.set_attr<FMutateInputs>("FMutateInputs", [](const NodeAttrs& attrs) {
-    return std::vector<uint32_t>{3, 4};
-  })
-.set_support_level(1);
-
-// softmax
-DMLC_REGISTER_PARAMETER(SoftmaxParam);
-
-NNVM_REGISTER_OP(softmax)
-.describe(R"code(Computes softmax.
-
-.. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}
-
-.. note::
-    This operator can be optimized away for inference.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(SoftmaxParam::__FIELDS__())
-.set_attr_parser(ParamParser<SoftmaxParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SoftmaxParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
-    return Array<Tensor>{ topi::nn::softmax(inputs[0], param.axis) };
-  })
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // grad_x = grad_y dot jacobian of softmax
-    //
-    // jacobian of softmax
-    // [-y1y1 + y1, -y1y2,        ...    ]
-    // [ ...      , -y2y2 + y2,   ...    ]
-    // [ ...                      ...    ]
-    // [ ...                  ,-ynyn + yn]
-    //
-    // grad_x =
-    // [-y1*(ograd1*y1 - ograd1 + ograd2*y2 + ...),
-    //  -y2*(ograd1*y1 - ograd2 + ograd2*y2 + ...),
-    //  ...
-    //  -yn*(ograd1*y1 - ogradn + ograd2*y2 + ...)]
-
-    // grad_x = ograd elemwise_mul output
-    // grad_x = sum(grad_x, keepdim, axis)
-    // grad_x = grad_x broadcast_mul output
-    // grad_x = neg grad_x
-    // grad_x = grad_x + ograd elemwise_mul output
-    const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
-    NodeEntry output =  NodeEntry{n, 0, 0};
-    NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub0", {ograds[0], output});
-    NodeEntry sub1 = MakeNode("sum", n->attrs.name + "_grad_sub1", {sub0},
-                              {{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
-    NodeEntry sub2 = MakeNode("broadcast_mul", n->attrs.name + "_grad_sub2", {sub1, output});
-    return std::vector<NodeEntry> {
-      MakeNode("elemwise_sub", n->attrs.name + "_grad", {sub0, sub2})
-    };
-});
-
-// log_softmax
-NNVM_REGISTER_OP(log_softmax)
-.describe(R"code(Computes log softmax.
-
-.. math:: \text{log_softmax}(x)_i = \log \frac{exp(x_i)}{\sum_j exp(x_j)}
-
-.. note::
-    This operator can be optimized away for inference.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(SoftmaxParam::__FIELDS__())
-.set_attr_parser(ParamParser<SoftmaxParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SoftmaxParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const SoftmaxParam& param = nnvm::get<SoftmaxParam>(attrs.parsed);
-    CHECK(param.axis == -1 || param.axis == static_cast<int32_t>(inputs[0].ndim()) - 1)
-        << "log_softmax currently only works on last dimension";
-    return Array<Tensor>{ topi::nn::log_softmax(inputs[0]) };
-  })
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // grad_x = grad_y dot jacobian of logsoftmax
-    //
-    // jacobian of logsoftmax
-    // [-y1 + 1, -y2,        ...    ]
-    // [ ...   , -y2 + 1,    ...    ]
-    // [ ...                 ...    ]
-    // [ ...                ,-yn + 1]
-    //
-    // grad_x =
-    // [ograd1 - exp(y1)*(ograd1 + ... + ogradn),
-    //  ograd2 - exp(y2)*(ograd1 + ... + ogradn),
-    //  ...
-    //  ogradn - exp(yn)*(ograd1 + ... + ogradn)]
-
-    // grad_x = sum(ograd, keepdim, axis)
-    // sigma = exp(output)
-    // grad_x = grad_x elemwise_mul sigma
-    // grad_x = neg grad_x
-    // grad_x = grad_x + ograd
-    const SoftmaxParam& param = nnvm::get<SoftmaxParam>(n->attrs.parsed);
-    NodeEntry output =  NodeEntry{n, 0, 0};
-    NodeEntry sub0 = MakeNode("sum", n->attrs.name + "_grad_sub0", {ograds[0]},
-                              {{"axis", std::to_string(param.axis)}, {"keepdims", "true"}});
-    NodeEntry sub1 = MakeNode("exp", n->attrs.name + "_grad_sub1", {output});
-    NodeEntry sub2 = MakeNode("broadcast_mul", n->attrs.name + "_grad_sub2", {sub0, sub1});
-    return std::vector<NodeEntry> {
-      MakeNode("elemwise_sub", n->attrs.name + "_grad", {ograds[0], sub2})
-    };
-})
-.set_support_level(1);
-
-// leaky_relu
-DMLC_REGISTER_PARAMETER(LeakyReLUParam);
-
-NNVM_REGISTER_OP(leaky_relu)
-.describe(R"code(Leaky version of a Rectified Linear Unit.
-
-`y = x > 0 ? x : alpha * x`
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(LeakyReLUParam::__FIELDS__())
-.set_attr_parser(ParamParser<LeakyReLUParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LeakyReLUParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const LeakyReLUParam& param = nnvm::get<LeakyReLUParam>(attrs.parsed);
-    return Array<Tensor>{ topi::leaky_relu(inputs[0], param.alpha) };
-  })
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = leak_relu(x)
-    // grad = indicator(x > 0) + alpha * indicator(x < 0)
-    const LeakyReLUParam& param = nnvm::get<LeakyReLUParam>(n->attrs.parsed);
-    NodeEntry zero = MakeNode("zeros_like", n->attrs.name + "_grad_zero",
-                              {n->inputs[0]});
-    NodeEntry sub0 = MakeNode("greater", n->attrs.name + "_pos_grad",
-                              {n->inputs[0], zero});
-    NodeEntry sub1 = MakeNode("less", n->attrs.name + "_neg_grad",
-                              {n->inputs[0], zero});
-    NodeEntry sub2 = MakeNode("__mul_scalar__", n->attrs.name + "_neg_mul_2",
-                              {sub1},
-                              {{"scalar", std::to_string(param.alpha)}});
-    NodeEntry sub3 = MakeNode("elemwise_add", n->attrs.name + "_sub3", {sub0, sub2});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad", {ograds[0], sub3})
-    };
-})
-.set_support_level(1);
-
-// prelu
-DMLC_REGISTER_PARAMETER(PReLUParam);
-
-inline bool PReluInferShape(const nnvm::NodeAttrs &attrs,
-                            std::vector<TShape> *in_shape,
-                            std::vector<TShape> *out_shape) {
-  const PReLUParam &param = nnvm::get<PReLUParam>(attrs.parsed);
-  TShape dshape = in_shape->at(0);
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape);
-
-  // The case of parametric relu
-  CHECK(size_t(param.axis) < dshape.Size())
-      << "Wrong axis ("  << param.axis << ")value.";
-
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 1, TShape({dshape[param.axis]}));
-
-  TShape oshape(dshape);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-inline bool PReluCorrectLayout(const NodeAttrs& attrs,
-                               std::vector<Layout> *in_layouts,
-                               const std::vector<Layout> *last_in_layouts,
-                               std::vector<Layout> *out_layouts) {
-  const PReLUParam& param = nnvm::get<PReLUParam>(attrs.parsed);
-  CHECK_EQ(in_layouts->size(), 2U);
-  CHECK_EQ(last_in_layouts->size(), 2U);
-  CHECK_EQ(out_layouts->size(), 1U);
-
-  const Layout& data_layout = last_in_layouts->at(0).defined() ?
-                              last_in_layouts->at(0) : in_layouts->at(0);
-  if (data_layout.defined()) {
-    CHECK(data_layout.indexof('C') == param.axis && !data_layout.contains('c'))
-      << "Channel in data layout " << data_layout
-      << " is not at index " << param.axis;
-  }
-
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 0, data_layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 1, Layout("C"));
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 0, data_layout);
-
-  return true;
-}
-
-NNVM_REGISTER_OP(prelu)
-.describe(R"code(Parametric version of a Rectified Linear Unit.
-It accepts two arguments: an input ``x`` and a channelwise slope ``alpha``
-and computes the output as :math:`PReLU(x) y = x > 0 ? x : alpha * x`,
-where :math:`*` is an channelwise multiplication for each sample in the
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("alpha", "Tensor", "Input channelwise alpha.")
-.add_arguments(PReLUParam::__FIELDS__())
-.set_attr_parser(ParamParser<PReLUParam>)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", PReluInferShape)
-.set_attr<FCorrectLayout>("FCorrectLayout", PReluCorrectLayout)
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"data", "alpha"};
-  })
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const PReLUParam& param = nnvm::get<PReLUParam>(attrs.parsed);
-    return Array<Tensor>{ topi::prelu(inputs[0], inputs[1], param.axis)};
-  })
-.set_support_level(4);
-
-DMLC_REGISTER_PARAMETER(PadParam);
-
-inline bool PadInferShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape>* in_shape,
-                          std::vector<TShape>* out_shape) {
-  const PadParam& param = nnvm::get<PadParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 1U);
-  CHECK_EQ(out_shape->size(), 1U);
-  TShape dshape = (*in_shape)[0];
-  if (dshape.ndim() == 0) return false;
-  CHECK_EQ(param.pad_width.ndim(), dshape.ndim());
-  TShape oshape = dshape;
-  for (uint32_t i = 0; i < dshape.ndim(); i++) {
-    CHECK_EQ(param.pad_width[i].ndim(), 2U);
-    int pad_before = param.pad_width[i][0];
-    int pad_after = param.pad_width[i][1];
-    oshape[i] = dshape[i] + pad_before + pad_after;
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(pad)
-.describe(R"code(Pad for n-D tensor.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "n-D Tensor", "Input data.")
-.add_arguments(PadParam::__FIELDS__())
-.set_attr_parser(ParamParser<PadParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<PadParam>)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FInferShape>("FInferShape", PadInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutCopyToOut<1, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const PadParam& param = nnvm::get<PadParam>(attrs.parsed);
-    auto pad_width = param.pad_width;
-    CHECK(pad_width.ndim() == inputs[0]->shape.size() &&
-      pad_width[0].ndim() == 2)
-      << "Illegal pad_width";
-    Array<tvm::Expr> pad_before;
-    for (size_t i = 0; i < pad_width.ndim(); ++i) {
-      pad_before.push_back(tvm::make_const(tvm::DataType::Int(32), pad_width[i][0]));
-    }
-    Array<tvm::Expr> pad_after;
-    for (size_t i = 0; i < pad_width.ndim(); ++i) {
-      pad_after.push_back(tvm::make_const(tvm::DataType::Int(32), pad_width[i][1]));
-    }
-    return Array<Tensor>{ topi::pad(inputs[0], pad_before, pad_after,
-                          tvm::make_const(inputs[0]->dtype, param.pad_value)) };
-})
-.set_support_level(1);
-
-// layout transformer
-DMLC_REGISTER_PARAMETER(LayoutTransformParam);
-
-inline bool LayoutTransformInferShape(const NodeAttrs& attrs,
-                                      std::vector<TShape>* in_attrs,
-                                      std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
-  CHECK_EQ(out_attrs->size(), 1U);
-  const LayoutTransformParam& param = nnvm::get<LayoutTransformParam>(attrs.parsed);
-  const TShape &dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0) return false;
-  const TShape &oshape = ConvertLayout(dshape,
-                                       Layout(param.src_layout),
-                                       Layout(param.dst_layout));
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(__layout_transform__)
-.describe(R"code(Transform the input data layout.
-
-For transforming from NCHW to N16cHWC, the `__layout_transform__` operator reshapes
-the input array by output[n, c, h, w, C] = data[n, C*16+c, h, w]
-
-)code" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(LayoutTransformParam::__FIELDS__())
-.set_attr_parser(ParamParser<LayoutTransformParam>)
-.set_attr<FInferShape>("FInferShape", LayoutTransformInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>(
-  "FCorrectLayout", [](const NodeAttrs& attrs,
-                     std::vector<Layout> *ilayouts,
-                     const std::vector<Layout> *last_ilayouts,
-                     std::vector<Layout> *olayouts) {
-    const LayoutTransformParam& param = nnvm::get<LayoutTransformParam>(attrs.parsed);
-    CHECK_EQ(ilayouts->size(), 1U);
-    CHECK_EQ(olayouts->size(), 1U);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, 0, Layout(param.src_layout));
-    NNVM_ASSIGN_LAYOUT(*olayouts, 0, Layout(param.dst_layout));
-    return true;
-})
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& outputs) {
-    const LayoutTransformParam& param = nnvm::get<LayoutTransformParam>(attrs.parsed);
-    return Array<Tensor>{
-      topi::layout_transform(inputs[0], param.src_layout, param.dst_layout)
-    };
-})
-.set_support_level(1);
-
-DMLC_REGISTER_PARAMETER(LRNParam);
-
-inline bool LRNInferShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape>* in_shape,
-                          std::vector<TShape>* out_shape) {
-  TShape dshape = (*in_shape)[0];
-  TShape oshape = dshape;
-
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(lrn)
-.describe(R"code(LRN layer)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.set_attr_parser(ParamParser<LRNParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<LRNParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", LRNInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_support_level(1);
-
-DMLC_REGISTER_PARAMETER(L2NormalizeParam);
-
-inline bool L2NormalizeInferShape(const nnvm::NodeAttrs& attrs,
-                                  std::vector<TShape>* in_shape,
-                                  std::vector<TShape>* out_shape) {
-  TShape dshape = (*in_shape)[0];
-  TShape oshape = dshape;
-
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(l2_normalize)
-.describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.set_attr_parser(ParamParser<L2NormalizeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<L2NormalizeParam>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", L2NormalizeInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_support_level(1);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/nn/nn_common.h b/nnvm/src/top/nn/nn_common.h
deleted file mode 100644
index d7ce420b6d94..000000000000
--- a/nnvm/src/top/nn/nn_common.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nn_common.h
- * \brief Common utilities for nn ops.
- */
-#ifndef NNVM_TOP_NN_NN_COMMON_H_
-#define NNVM_TOP_NN_NN_COMMON_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <nnvm/layout.h>
-#include <nnvm/top/nn.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <algorithm>
-
-namespace nnvm {
-namespace top {
-
-template<typename ParamType>
-inline uint32_t UseBiasNumInputs(const NodeAttrs& attrs) {
-  const ParamType& param = get<ParamType>(attrs.parsed);
-  return param.use_bias ? 3 : 2;
-}
-
-template<typename ParamType>
-inline std::vector<std::string> UseBiasListInputNames(const NodeAttrs& attrs) {
-  const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
-  if (param.use_bias) {
-    return {"data", "weight", "bias"};
-  } else {
-    return {"data", "weight"};
-  }
-}
-
-/*!
- * \brief Convert shape in src_layout to shape in dst_layout
- * \param src original shape
- * \param src_layout layout of original shape
- * \param dst_layout target layout
- * \return shape in target layout
- */
-inline TShape ConvertLayout(TShape src, const Layout& src_layout, const Layout& dst_layout) {
-  if (src_layout == dst_layout) {
-    return src;
-  } else if (!src_layout.defined()) {
-    LOG(FATAL) << "cannot convert undefined layout to " << dst_layout;
-  } else if (!dst_layout.defined()) {
-    LOG(FATAL) << "cannot convert " << src_layout << " to undefined layout";
-  }
-
-  CHECK(src_layout.convertible(dst_layout)) << "cannot convert from "
-                                            << src_layout << " to " << dst_layout;
-
-  TShape dst(dst_layout.ndim());
-  for (size_t i = 0; i < src_layout.ndim(); ++i) {
-    Layout::LayoutDim src_dim = src_layout[i];
-    if (Layout::is_superdim(src_dim)) {
-      int dst_major_pos = dst_layout.indexof(Layout::to_superdim(src_dim));
-      int dst_minor_pos = dst_layout.indexof(Layout::to_subdim(src_dim));
-      int src_minor_pos = src_layout.indexof(Layout::to_subdim(src_dim));
-      int src_factor = src_layout.subsizeof(src_dim);
-      int dst_factor = dst_layout.subsizeof(src_dim);
-
-      uint32_t src_dim_size = src[i];
-      if (src_minor_pos >= 0) {
-        CHECK_EQ(src_factor, src[src_minor_pos]) << "src shape " << src
-                                                 << " does not agree with layout " << src_layout;
-        src_dim_size *= src_factor;
-      }
-
-      dst[dst_major_pos] = src_dim_size;
-      if (dst_minor_pos >= 0) {
-        CHECK_GT(dst_factor, 0);
-        CHECK_LE(dst_factor, src_dim_size) << "Converting " << src
-                                           << " from " << src_layout
-                                           << " to " << dst_layout
-                                           << ": cannot split dimension size of "
-                                           << src_dim_size << " by " << dst_factor;
-        dst[dst_major_pos] /= dst_factor;
-        dst[dst_minor_pos] = dst_factor;
-      }
-    }
-  }
-  return dst;
-}
-
-}  // namespace top
-}  // namespace nnvm
-
-#endif  // NNVM_TOP_NN_NN_COMMON_H_
diff --git a/nnvm/src/top/nn/pooling.cc b/nnvm/src/top/nn/pooling.cc
deleted file mode 100644
index 11ca637d3b06..000000000000
--- a/nnvm/src/top/nn/pooling.cc
+++ /dev/null
@@ -1,435 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-
-/*!
- * \file pooling.cc
- * \brief Property def of pooling operators.
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/nn.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn/pooling.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-DMLC_REGISTER_PARAMETER(MaxPool2DParam);
-
-template <typename T>
-inline bool Pool2DInferShape(const nnvm::NodeAttrs& attrs,
-                             std::vector<TShape>* in_shape,
-                             std::vector<TShape>* out_shape) {
-  const T& param = nnvm::get<T>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 1U);
-  CHECK_EQ(out_shape->size(), 1U);
-
-  TShape dshape = (*in_shape)[0];
-  if (dshape.ndim() ==  0) return false;
-
-  CHECK_GE(dshape.ndim(), 2U)
-    << "Pool2D only support input >= 2-D: input must have height and width";
-
-  Layout layout(param.layout);
-  CHECK(layout.contains('H') && layout.contains('W') &&
-        !layout.contains('h') && !layout.contains('w'))
-    << "Invalid layout " << layout
-    << ". Pool2D layout must have H and W, which cannot be split";
-
-  const auto hidx = layout.indexof('H');
-  const auto widx = layout.indexof('W');
-
-  dim_t pad_h, pad_w;
-  if (param.padding.ndim() == 1) {
-    pad_h = param.padding[0] * 2;
-    pad_w = param.padding[0] * 2;
-  } else if (param.padding.ndim() == 2) {
-    // (top, left)
-    pad_h = param.padding[0] * 2;
-    pad_w = param.padding[1] * 2;
-  } else if (param.padding.ndim() == 4) {
-    // (top, left, bottom, right)
-    pad_h = param.padding[0] + param.padding[2];
-    pad_w = param.padding[1] + param.padding[3];
-  } else {
-    return false;
-  }
-
-  TShape oshape = dshape;
-  CHECK(param.pool_size[0] <= dshape[hidx] + pad_h)
-      << "pool size (" << param.pool_size[0] << ") exceeds input (" << dshape[hidx]
-      << " padded to " << (dshape[hidx] + pad_h) << ")";
-  CHECK(param.pool_size[1] <= dshape[widx] + pad_w)
-      << "pool size (" << param.pool_size[1] << ") exceeds input (" << dshape[widx]
-      << " padded to " << (dshape[widx] + pad_w) << ")";
-
-  if (!param.ceil_mode) {
-    oshape[hidx] = ((dshape[hidx] + pad_h - param.pool_size[0]) /
-                    param.strides[0]) + 1;
-    oshape[widx] = ((dshape[widx] + pad_w - param.pool_size[1]) /
-                    param.strides[1]) + 1;
-  } else {
-    oshape[hidx] = ((dshape[hidx] + pad_h - param.pool_size[0] +
-                    param.strides[0] - 1) / param.strides[0]) + 1;
-    oshape[widx] = ((dshape[widx] + pad_w - param.pool_size[1] +
-                    param.strides[1] - 1) / param.strides[1]) + 1;
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-template <typename T>
-inline bool Pool2DCorrectLayout(const NodeAttrs& attrs,
-                                std::vector<Layout> *ilayouts,
-                                const std::vector<Layout> *last_ilayouts,
-                                std::vector<Layout> *olayouts) {
-  const T &param = nnvm::get<T>(attrs.parsed);
-  CHECK_EQ(ilayouts->size(), 1);
-  CHECK_EQ(last_ilayouts->size(), 1);
-  CHECK_EQ(olayouts->size(), 1);
-
-  Layout input = (*ilayouts)[0];
-  const Layout layout(param.layout);
-
-  if (input.defined()) {
-    CHECK(input.convertible(layout)) << "Invalid input layout " << input;
-    if (input.indexof('W') != layout.indexof('W') ||
-        input.indexof('H') != layout.indexof('H') ||
-        input.contains('w') || input.contains('h')) {
-      // as long as the index doesn't change for width and height
-      // pool2d can keep the input layout.
-      input = layout;
-    }
-  } else {
-    input = layout;
-  }
-
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, input);
-
-  return true;
-}
-
-NNVM_REGISTER_OP(max_pool2d)
-.describe(R"code(Max pooling operation for one dimensional data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
-            (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
-           (batch_size, channels, out_height, out_width)  if `layout` is `NCHW`.
-           out_height and out_width are calculated as::
-
-               out_height = floor((height+padding[0]+padding[2]-pool_size[0])/strides[0])+1
-               out_width = floor((width+padding[1]+padding[3]-pool_size[1])/strides[1])+1
-
-           where padding will be an expanded array based on number of values passed as::
-               one int : all sides same padding used.
-               two int : bottom, right use same as top and left.
-               four int: padding width in the order of (top, left, bottom, right).
-
-           When `ceil_mode` is `True`, ceil will be used instead of floor in this
-           equation.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(MaxPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<MaxPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MaxPool2DParam>)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FInferShape>("FInferShape", Pool2DInferShape<MaxPool2DParam>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Pool2DCorrectLayout<MaxPool2DParam>)
-.set_attr<FTVMCompute>("FTVMCompute", [](const NodeAttrs& attrs,
-                                         const Array<Tensor>& inputs,
-                                         const Array<Tensor>& out_info) {
-  const MaxPool2DParam& param = nnvm::get<MaxPool2DParam>(attrs.parsed);
-  auto pool_size = ShapeToArray(param.pool_size);
-  auto strides = ShapeToArray(param.strides);
-  auto padding = ShapeToArray(param.padding);
-  auto ceil_mode = param.ceil_mode;
-
-  Layout layout(param.layout);
-  CHECK(layout.convertible(Layout("NCHW")))
-    << "max_pool2d currently only supports layouts that are convertible from NCHW";
-  CHECK_EQ(layout.indexof('h'), -1) << "max_pool2d does not support input split on height";
-  CHECK_EQ(layout.indexof('w'), -1) << "max_pool2d does not support input split on width";
-
-  CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
-    << "Pool2D only support 4-D input (e.g., NCHW)"
-    << " or 5-D input (last dimension is a split of channel)";
-
-  if (param.padding.ndim() == 1) {
-    padding.push_back(padding[0]);
-    padding.push_back(padding[0]);
-    padding.push_back(padding[0]);
-  } else if (param.padding.ndim() == 2) {
-    padding.push_back(padding[0]);
-    padding.push_back(padding[1]);
-  }
-
-  return Array<Tensor>{
-    topi::nn::pool(inputs[0], pool_size, strides, padding,
-                   topi::nn::kMaxPool, ceil_mode, layout.name())};
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return MakeGradNode("_max_pool2d_grad", n,
-                        {ograds[0], n->inputs[0], NodeEntry{n, 0, 0}},
-                        n->attrs.dict);
-})
-.set_support_level(2);
-
-NNVM_REGISTER_OP(_max_pool2d_grad)
-  .describe(R"code(Max pooling 2D grad.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("ograd", "4D Tensor", "Output grad.")
-.add_argument("input", "4D Tensor", "Input data of max_pool2d grad.")
-.add_argument("output", "4D Tensor", "Output data of max_pool2d grad.")
-.set_num_inputs(3)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<MaxPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MaxPool2DParam>)
-.set_attr<FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
-.set_attr<FInferType>("FInferType", ElemwiseType<3, 1>)
-.set_attr<TIsBackward>("TIsBackward", true);
-
-DMLC_REGISTER_PARAMETER(AvgPool2DParam);
-
-NNVM_REGISTER_OP(avg_pool2d)
-.describe(R"code(Average pooling operation for one dimensional data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
-            (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
-           (batch_size, channels, out_height, out_width)  if `layout` is `NCHW`.
-           out_height and out_width are calculated as::
-
-               out_height = floor((height+padding[0]+padding[2]-pool_size[0])/strides[0])+1
-               out_width = floor((width+padding[1]+padding[3]-pool_size[1])/strides[1])+1
-
-           where padding will be an expanded array based on number of values passed as::
-               one int : all sides same padding used.
-               two int : bottom, right use same as top and left.
-               four int: padding width in the order of (top, left, bottom, right).
-
-           When `ceil_mode` is `True`, ceil will be used instead of floor in this
-           equation.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(AvgPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<AvgPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<AvgPool2DParam>)
-.set_attr<FInferShape>("FInferShape", Pool2DInferShape<AvgPool2DParam>)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", Pool2DCorrectLayout<AvgPool2DParam>)
-.set_attr<FTVMCompute>("FTVMCompute", [](const NodeAttrs& attrs,
-                                         const Array<Tensor>& inputs,
-                                         const Array<Tensor>& out_info) {
-  const AvgPool2DParam& param = nnvm::get<AvgPool2DParam>(attrs.parsed);
-  auto pool_size = ShapeToArray(param.pool_size);
-  auto strides = ShapeToArray(param.strides);
-  auto padding = ShapeToArray(param.padding);
-  auto ceil_mode = param.ceil_mode;
-  auto count_include_pad = param.count_include_pad;
-
-  Layout layout(param.layout);
-  CHECK(layout.convertible(Layout("NCHW")))
-    << "avg_pool2d currently only supports layouts that are convertible from NCHW";
-  CHECK_EQ(layout.indexof('h'), -1) << "avg_pool2d does not support input split on height";
-  CHECK_EQ(layout.indexof('w'), -1) << "avg_pool2d does not support input split on width";
-
-  CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
-    << "Pool2D only support 4-D input (e.g., NCHW)"
-    << " or 5-D input (last dimension is a split of channel)";
-
-  if (param.padding.ndim() == 1) {
-    padding.push_back(padding[0]);
-    padding.push_back(padding[0]);
-    padding.push_back(padding[0]);
-  } else if (param.padding.ndim() == 2) {
-    padding.push_back(padding[0]);
-    padding.push_back(padding[1]);
-  }
-
-  return Array<Tensor>{
-    topi::nn::pool(inputs[0], pool_size, strides, padding,
-                   topi::nn::kAvgPool, ceil_mode, layout.name(), count_include_pad)};
-})
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(2);
-
-
-DMLC_REGISTER_PARAMETER(GlobalPool2DParam);
-
-inline bool GlobalPool2DInferShape(const nnvm::NodeAttrs& attrs,
-                                   std::vector<TShape>* in_shape,
-                                   std::vector<TShape>* out_shape) {
-  static const Layout kNCHW("NCHW");
-  const GlobalPool2DParam& param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 1U);
-  CHECK_EQ(out_shape->size(), 1U);
-
-  TShape dshape = (*in_shape)[0];
-  if (dshape.ndim() ==  0) return false;
-
-  CHECK_GE(dshape.ndim(), 2U)
-    << "Pool2D only support input >= 2-D: input must have height and width";
-
-  Layout layout(param.layout);
-  CHECK(layout.contains('H') && layout.contains('W') &&
-        !layout.contains('h') && !layout.contains('w'))
-    << "Invalid layout " << layout
-    << ". Pool2D layout must have H and W, which cannot be split";
-
-  const auto hidx = layout.indexof('H');
-  const auto widx = layout.indexof('W');
-
-  TShape oshape = dshape;
-  oshape[hidx] = oshape[widx] = 1;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-inline bool GlobalPool2DCorrectLayout(const NodeAttrs& attrs,
-                                      std::vector<Layout> *ilayouts,
-                                      const std::vector<Layout> *last_ilayouts,
-                                      std::vector<Layout> *olayouts) {
-  const GlobalPool2DParam &param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
-  CHECK_EQ(ilayouts->size(), 1);
-  CHECK_EQ(last_ilayouts->size(), 1);
-  CHECK_EQ(olayouts->size(), 1);
-
-  Layout input = (*ilayouts)[0];
-  const Layout layout(param.layout);
-
-  if (input.defined()) {
-    CHECK(input.convertible(layout)) << "Invalid input layout " << input;
-    if (input.indexof('W') != layout.indexof('W') ||
-        input.indexof('H') != layout.indexof('H') ||
-        input.contains('w') || input.contains('h')) {
-      // as long as the index doesn't change for width and height
-      // pool2d can keep the input layout.
-      input = layout;
-    }
-  } else {
-    input = layout;
-  }
-
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, input);
-
-  return true;
-}
-
-NNVM_REGISTER_OP(global_max_pool2d)
-.describe(R"code(Global max pooling operation for 2D data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
-            (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
-           (batch_size, channels, 1, 1)  if `layout` is `NCHW`.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(GlobalPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<GlobalPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<GlobalPool2DParam>)
-.set_attr<FInferShape>("FInferShape", GlobalPool2DInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", GlobalPool2DCorrectLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-  const GlobalPool2DParam& param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
-  Layout layout(param.layout);
-  CHECK(layout.convertible(Layout("NCHW")))
-    << "global_max_pool2d currently only supports layouts that are convertible from NCHW";
-  CHECK_EQ(layout.indexof('h'), -1)
-    << "global_max_pool2d does not support input split on height";
-  CHECK_EQ(layout.indexof('w'), -1)
-    << "global_max_pool2d does not support input split on width";
-
-  CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
-    << "Pool2D only support 4-D input (e.g., NCHW)"
-    << " or 5-D input (last dimension is a split of channel)";
-
-  return Array<Tensor>{
-    topi::nn::global_pool(inputs[0], topi::nn::kMaxPool, layout.name()) };
-})
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(2);
-
-
-NNVM_REGISTER_OP(global_avg_pool2d)
-.describe(R"code(Global average pooling operation for 2D data.
-
-- **data**: This depends on the `layout` parameter. Input is 4D array of shape
-            (batch_size, channels, height, width) if `layout` is `NCHW`.
-- **out**: This depends on the `layout` parameter. Output is 4D array of shape
-           (batch_size, channels, 1, 1)  if `layout` is `NCHW`.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(GlobalPool2DParam::__FIELDS__())
-.set_attr_parser(ParamParser<GlobalPool2DParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<GlobalPool2DParam>)
-.set_attr<FInferShape>("FInferShape", GlobalPool2DInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", GlobalPool2DCorrectLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-  const GlobalPool2DParam& param = nnvm::get<GlobalPool2DParam>(attrs.parsed);
-  Layout layout(param.layout);
-  CHECK(layout.convertible(Layout("NCHW")))
-    << "global_avg_pool2d currently only supports layouts that are convertible from NCHW";
-  CHECK_EQ(layout.indexof('h'), -1)
-    << "global_avg_pool2d does not support input split on height";
-  CHECK_EQ(layout.indexof('w'), -1)
-    << "global_avg_pool2d does not support input split on width";
-
-  CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
-    << "Pool2D only support 4-D input (e.g., NCHW)"
-    << " or 5-D input (last dimension is a split of channel)";
-
-  return Array<Tensor>{
-    topi::nn::global_pool(inputs[0], topi::nn::kAvgPool, layout.name()) };
-})
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_support_level(2);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/nn/upsampling.cc b/nnvm/src/top/nn/upsampling.cc
deleted file mode 100644
index 68583ae616f2..000000000000
--- a/nnvm/src/top/nn/upsampling.cc
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file upsampling.cc
- * \brief Property def of upsampling operators.
- */
-#include <tvm/operation.h>
-#include <tvm/expr.h>
-#include <nnvm/layout.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "nn_common.h"
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/elemwise.h"
-#include "topi/transform.h"
-#include "topi/nn/upsampling.h"
-
-namespace nnvm {
-namespace top {
-using tvm::Expr;
-using tvm::Array;
-using tvm::Tensor;
-using nnvm::compiler::FTVMCompute;
-
-DMLC_REGISTER_PARAMETER(UpSamplingParam);
-
-inline bool UpSamplingInferShape(const nnvm::NodeAttrs& attrs,
-                                 std::vector<TShape>* in_shape,
-                                 std::vector<TShape>* out_shape) {
-  static const Layout kNCHW("NCHW");
-  const UpSamplingParam& param = nnvm::get<UpSamplingParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 1U);
-  CHECK_EQ(out_shape->size(), 1U);
-  TShape dshape = (*in_shape)[0];
-  if (dshape.ndim() ==  0) return false;
-
-  dshape = ConvertLayout(dshape, param.layout, kNCHW);
-  TShape oshape = dshape;
-  oshape[2] = oshape[2] * param.scale;
-  oshape[3] = oshape[3] * param.scale;
-  oshape = ConvertLayout(oshape, kNCHW, param.layout);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-
-  return true;
-}
-
-inline bool UpsamplingLayout(const NodeAttrs& attrs,
-                             std::vector<Layout> *in_layouts,
-                             const std::vector<Layout> *last_in_layouts,
-                             std::vector<Layout> *out_layouts) {
-  const UpSamplingParam& param = nnvm::get<UpSamplingParam>(attrs.parsed);
-  CHECK_EQ(in_layouts->size(), 1U);
-  CHECK_EQ(out_layouts->size(), 1U);
-  const Layout layout(param.layout);
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 0, layout);
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 0, layout);
-  return true;
-}
-
-NNVM_REGISTER_OP(upsampling)
-.describe(R"(Perform upsampling to input array with nearest neighbour or bilinear interpolation.
-
-- **data**: data is 4D array of shape
-            (batch_size, channels, in_height, in_width) for NCHW
-            (batch_size, in_height, in_width, channels) for NHWC
-
-- **out**: Output is 4D array of shape
-           for layout NCHW
-           (batch_size, channels, in_height*scale, in_width*scale)
-
-           for layout NHWC
-           (batch_size, in_height*scale, in_width*scale, channels)
-
-)" NNVM_ADD_FILELINE)
-.add_argument("data", "4D Tensor", "Input data.")
-.add_arguments(UpSamplingParam::__FIELDS__())
-.set_attr_parser(ParamParser<UpSamplingParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<UpSamplingParam>)
-.set_attr<FInferShape>("FInferShape", UpSamplingInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", UpsamplingLayout)
-.set_num_outputs(1)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-  const UpSamplingParam& param = nnvm::get<UpSamplingParam>(attrs.parsed);
-  Array<Expr> oshape;
-  if (param.layout == "NCHW") {
-    oshape.push_back(out_info[0]->shape[2]);
-    oshape.push_back(out_info[0]->shape[3]);
-  } else {
-    oshape.push_back(out_info[0]->shape[1]);
-    oshape.push_back(out_info[0]->shape[2]);
-  }
-
-  return Array<Tensor>{ topi::nn::upsampling(inputs[0], oshape, param.layout, param.method)};
-})
-.set_support_level(2);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/op_common.h b/nnvm/src/top/op_common.h
deleted file mode 100644
index 7213e1c9c116..000000000000
--- a/nnvm/src/top/op_common.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file op_common.h
- * \brief Common operator utilities
- */
-#ifndef NNVM_TOP_OP_COMMON_H_
-#define NNVM_TOP_OP_COMMON_H_
-
-#include <dmlc/logging.h>
-#include <dmlc/parameter.h>
-#include <nnvm/top/tensor.h>
-#include <string>
-#include <vector>
-#include <utility>
-#include <unordered_map>
-#include <unordered_set>
-
-namespace nnvm {
-namespace top {
-/*!
- * \brief Parse keyword arguments as PType arguments and save to parsed
- * \tparam PType the parameter type.
- * \param attrs The attributes.
- */
-template<typename PType>
-inline void ParamParser(nnvm::NodeAttrs* attrs) {
-  PType param;
-  try {
-    param.Init(attrs->dict);
-  } catch (const dmlc::ParamError& e) {
-    std::ostringstream os;
-    os << e.what();
-    os << ", in operator " << attrs->op->name << "("
-       << "name=\"" << attrs->name << "\"";
-    for (const auto& k : attrs->dict) {
-      os << ", " << k.first << "=\"" << k.second << "\"";
-    }
-    os << ")";
-    throw dmlc::ParamError(os.str());
-  }
-  attrs->parsed = std::move(param);
-}
-
-/*!
- * \brief Parse keyword arguments as PType arguments and save to parsed
- * \tparam PType the arameter type.
- * \param attrs The attributes.
- */
-template<typename PType>
-inline std::unordered_map<std::string, std::string>
-ParamGetAttrDict(const nnvm::NodeAttrs& attrs) {
-  std::unordered_map<std::string, std::string> dict = attrs.dict;
-  nnvm::get<PType>(attrs.parsed).UpdateDict(&dict);
-  return dict;
-}
-
-/*! \brief check if shape is empty or contains unkown (0) dim. */
-inline bool shape_is_none(const TShape& x) {
-  return x.ndim() == 0 || x.Size() == 0;
-}
-
-/*! \brief check if type is none (-1) */
-inline bool type_is_none(const int& x) {
-  return x == -1;
-}
-
-/*! \brief check if shape is scalar({1}). */
-inline bool shape_is_scalar(const TShape& x) {
-  return x.ndim() == 1 && x.Size() == 1;
-}
-
-/*! \brief get string representation of shape */
-inline std::string shape_string(const TShape& x) {
-  std::ostringstream os;
-  os << x;
-  return os.str();
-}
-
-/*! \brief get string representation of shape */
-inline std::string type_string(const int& x) {
-  return std::to_string(x);
-}
-
-/*!
- * \brief Assign x to y. Checks for compatiblity when y is not empty.
- *  Allow missing dim in both x and y (as 0).
- * \param y target shape.
- * \param x source shape.
- * \return whether x and y are compatible.
- */
-inline bool shape_assign(TShape *y, const TShape& x) {
-  if (y->ndim() == 0) {
-    *y = x;
-    return true;
-  } else if (y->ndim() != x.ndim()) {
-    return x.ndim() == 0;
-  } else {
-    for (size_t i = 0; i < y->ndim(); ++i) {
-      if ((*y)[i] == 0) {
-        (*y)[i] = x[i];
-      } else if ((*y)[i] != x[i] && x[i] != 0) {
-        return false;
-      }
-    }
-    return true;
-  }
-}
-
-/*!
- * \brief Assign x to y. Checks for compatiblity when y is not -1.
- * \param y target type.
- * \param x source type.
- * \return whether x and y are compatible.
- */
-inline bool type_assign(int *y, const int& x) {
-  if (*y == -1) {
-    *y = x;
-    return true;
-  } else if (*y != x && x != -1) {
-    return false;
-  }
-  return true;
-}
-
-template<typename AttrType>
-inline std::string attr_assign_error_msg(const NodeAttrs& attrs,
-                                         int index, bool is_input,
-                                         const AttrType& expected,
-                                         const AttrType& actual,
-                                         const char* attr_name) {
-  static const auto& flist_inputs = Op::GetAttr<FListInputNames>("FListInputNames");
-  static const auto& flist_outputs = Op::GetAttr<FListOutputNames>("FListOutputNames");
-  const auto& flist = is_input ? flist_inputs : flist_outputs;
-  std::string name;
-  if (flist.count(attrs.op)) {
-    name = flist[attrs.op](attrs)[index];
-  } else {
-    name = (is_input ? "data" : "output") + std::to_string(index);
-  }
-  std::ostringstream msg;
-  msg << "Operator " << attrs.op->name << "(";
-  for (const auto& kv : attrs.dict) msg << kv.first << "=" << kv.second << ", ";
-  msg << "name=" << attrs.name << ") expects " << name << "\'s " << attr_name
-      << " to be " << expected << ", but got " << actual << ".";
-  return msg.str();
-}
-
-/*!
- * \brief macro assign shape to input if out is unknown otherwise check consistency
- *  Use macro so we can see the error file more clearly
- * \param inputs the shape array to store the result
- * \param index the index of in the array
- * \param shape the inferred shape
- */
-#define NNVM_ASSIGN_INPUT_SHAPE(attrs, inputs, index, shape)             \
-  {                                                                      \
-    if (!shape_assign(&(inputs)[index], TShape(shape))) {                \
-      LOG(FATAL) << attr_assign_error_msg(attrs, index, true, shape,     \
-                                          (inputs)[index], "shape");     \
-    }                                                                    \
-  }
-
-/*!
- * \brief macro assign shape to out if out is unknown otherwise check consistency
- *  Use macro so we can see the error file more clearly
- * \param inputs the shape array to store the result
- * \param index the index of in the array
- * \param shape the inferred shape
- */
-#define NNVM_ASSIGN_OUTPUT_SHAPE(attrs, outputs, index, shape)           \
-  {                                                                      \
-    if (!shape_assign(&(outputs)[index], TShape(shape))) {               \
-      LOG(FATAL) << attr_assign_error_msg(attrs, index, false, shape,    \
-                                          (outputs)[index], "shape");    \
-    }                                                                    \
-  }
-
-/*!
- * \brief macro assign type to out if out is unknown (-1) otherwise check consistency
- *  Use macro so we can see the error file more clearly
- * \param inputs the type array to store the result
- * \param index the index of in the array
- * \param type the inferred type
- */
-#define NNVM_ASSIGN_INPUT_TYPE(attrs, inputs, index, type)               \
-  {                                                                      \
-    if (!type_assign(&(inputs)[index], type)) {                          \
-      LOG(FATAL) << attr_assign_error_msg(attrs, index, true, type,      \
-                                          (inputs)[index], "type");      \
-    }                                                                    \
-  }
-
-/*!
- * \brief macro assign type to out if out is unknown (-1) otherwise check consistency
- *  Use macro so we can see the error file more clearly
- * \param inputs the type array to store the result
- * \param index the index of in the array
- * \param type the inferred type
- */
-#define NNVM_ASSIGN_OUTPUT_TYPE(attrs, outputs, index, type)             \
-  {                                                                      \
-    if (!type_assign(&(outputs)[index], type)) {                         \
-      LOG(FATAL) << attr_assign_error_msg(attrs, index, false, type,     \
-                                          (outputs)[index], "type");     \
-    }                                                                    \
-  }
-
-#define NNVM_ASSIGN_LAYOUT(outputs, index, layout)                       \
-  {                                                                      \
-    if (layout.defined()) {                                              \
-      (outputs)[index] = layout;                                         \
-    }                                                                    \
-  }
-
-/*!
- * \brief macro assign rhs shape to lhs
- *  Use macro so we can see the error file more clearly
- * \param lhs lhs shape
- * \param rhs rhs shape
- */
-#define SHAPE_ASSIGN(lhs, rhs)                                \
-  if ((lhs).ndim() == 0) (lhs) = (rhs);                       \
-  else                                                        \
-    CHECK_EQ(lhs, rhs) << "shape inference inconsistent";     \
-
-/*!
- * \brief macro assign rhs type to lhs
- *  Use macro so we can see the error file more clearly
- * \param lhs lhs type
- * \param rhs rhs type
- */
-#define DTYPE_ASSIGN(lhs, rhs)                                \
-  if ((lhs) == -1) (lhs) = (rhs);                             \
-  else                                                        \
-    CHECK_EQ(lhs, rhs) << "type inference inconsistent";     \
-
-// simply return the shape as same
-inline bool SameShape(const NodeAttrs& attrs,
-                      std::vector<TShape> *ishape,
-                      std::vector<TShape> *oshape) {
-  if (ishape->size() == 0 || (*ishape)[0].ndim() == 0) return false;
-  for (TShape& pshape : *oshape) {
-    pshape = (*ishape)[0];
-  }
-  for (TShape& pshape : *ishape) {
-    pshape = (*ishape)[0];
-  }
-  return true;
-}
-
-// return shape from node attrs
-template<typename PType>
-inline bool ZeroShape(const NodeAttrs& attrs,
-                      std::vector<TShape> *ishape,
-                      std::vector<TShape> *oshape) {
-  const TShape& ts = dmlc::get<PType>(attrs.parsed).shape;
-  if (ts.ndim() != 0) {
-    SHAPE_ASSIGN(oshape->at(0), ts);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-// do not infer layout
-inline bool ZeroLayout(const NodeAttrs& attrs,
-                       std::vector<Layout> *in_layouts,
-                       const std::vector<Layout> *last_in_layouts,
-                       std::vector<Layout> *out_layouts) {
-  return true;
-}
-
-// simply assign output shape or type from input
-template<typename AttrType, int in_index, int out_index>
-inline bool AssignOutputAttr(const NodeAttrs& attrs,
-                              std::vector<AttrType> *in_attrs,
-                              std::vector<AttrType> *out_attrs) {
-  CHECK_LT(in_index, in_attrs->size());
-  CHECK_LT(out_index, out_attrs->size());
-  const TShape &dshape = in_attrs->at(in_index);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, out_index, dshape);
-  return true;
-}
-
-// return type from node attrs
-template<typename PType>
-inline bool ZeroType(const NodeAttrs& attrs,
-                     std::vector<int> *iattr,
-                     std::vector<int> *oattr) {
-  int dtype = dmlc::get<PType>(attrs.parsed).dtype;
-  DTYPE_ASSIGN(oattr->at(0), dtype);
-  return true;
-}
-
-// Make zero grad node
-inline std::vector<NodeEntry> MakeZeroGradNodes(
-  const NodePtr& n,
-  const std::vector<NodeEntry>& ograds) {
-  std::vector<NodeEntry> ret;
-  for (uint32_t i = 0; i < n->num_inputs(); ++i) {
-    std::ostringstream os;
-    ret.push_back(MakeNode("zeros_like", n->attrs.name + "_zero_grad",
-                           {n->inputs[i]}));
-  }
-  return ret;
-}
-
-// Helper to make gradient node
-inline std::vector<NodeEntry> MakeGradNode(
-  const char* op_name,
-  const NodePtr& n,
-  std::vector<NodeEntry> inputs,
-  std::unordered_map<std::string, std::string> attr = {{}}) {
-  NodePtr p = Node::Create();
-  p->attrs.op = nnvm::Op::Get(op_name);
-  p->attrs.name = n->attrs.name + "_grad";
-  p->inputs = std::move(inputs);
-  p->attrs.dict = std::move(attr);
-  if (p->attrs.op->attr_parser) {
-    p->attrs.op->attr_parser(&p->attrs);
-  }
-  std::vector<NodeEntry> ret;
-  for (uint32_t i = 0; i < p->num_outputs(); ++i) {
-    ret.emplace_back(NodeEntry{p, i, 0});
-  }
-  return ret;
-}
-
-
-}  // namespace top
-}  // namespace nnvm
-
-#endif  // NNVM_TOP_OP_COMMON_H_
diff --git a/nnvm/src/top/tensor/broadcast.cc b/nnvm/src/top/tensor/broadcast.cc
deleted file mode 100644
index f30fab9f74b5..000000000000
--- a/nnvm/src/top/tensor/broadcast.cc
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file broadcast.cc
- * \brief broadcast operator.
- */
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <nnvm/top/nn.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/broadcast.h"
-#include "topi/elemwise.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-// broadcast_to
-DMLC_REGISTER_PARAMETER(BroadcastToParam);
-
-inline bool BroadcastToInferShape(const NodeAttrs& attrs,
-                                  std::vector<TShape>* in_attrs,
-                                  std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& ishape = (*in_attrs)[0];
-  if (ishape.ndim() == 0) return false;
-
-  const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
-  CHECK_EQ(ishape.ndim(), param.shape.ndim())
-      << "Operand of shape " << ishape
-      << " cannot be broadcasted to " << param.shape;
-  TShape oshape = param.shape;
-  for (dim_t i = 0; i < ishape.ndim(); ++i) {
-    if (oshape[i] != 0) {
-      CHECK(ishape[i] == oshape[i] || ishape[i] == 1)
-        << "Array cannot be broadcasted from " <<
-          ishape << " to " << param.shape;
-    } else {
-      oshape[i] = ishape[i];
-    }
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(broadcast_to)
-.describe(R"code(Broadcasts the input array to a new shape.
-
-Broadcasting is a mechanism that allows NDArrays to perform arithmetic operations
-with arrays of different shapes efficiently without creating multiple copies of arrays.
-Also see, `Broadcasting <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more explanation.
-
-Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
-`(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.
-
-For example::
-
-   broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
-                                           [ 1.,  2.,  3.]])
-
-The dimension which you do not want to change can also be kept as `0` which means copy the original value.
-So with `shape=(2,0)`, we will obtain the same result as in the above example.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(BroadcastToParam::__FIELDS__())
-.set_attr_parser(ParamParser<BroadcastToParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<BroadcastToParam>)
-.set_attr<FInferShape>("FInferShape", BroadcastToInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-    const Array<Tensor>& inputs,
-    const Array<Tensor>& out_info) {
-      const BroadcastToParam& param = nnvm::get<BroadcastToParam>(attrs.parsed);
-      auto shape = ShapeToArray(param.shape);
-      return Array<Tensor>{ topi::broadcast_to(inputs[0], shape) };
-  })
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(4);
-
-// binary broadcast op
-inline bool BinaryBroadcastShape(const nnvm::NodeAttrs& attrs,
-                                 std::vector<TShape>* in_attrs,
-                                 std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& lhs = (*in_attrs)[0];
-  const TShape& rhs = (*in_attrs)[1];
-
-  // avoid pre-mature shape inference.
-  if (lhs.ndim() == 0 || rhs.ndim() == 0) return false;
-
-  if (lhs == rhs) {
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *out_attrs, 0, lhs);
-    return true;
-  }
-  TShape out(std::max(lhs.ndim(), rhs.ndim()));
-  dim_t bl = out.ndim() - lhs.ndim();
-  dim_t br = out.ndim() - rhs.ndim();
-  for (dim_t i = 0; i < out.ndim(); ++i) {
-    dim_t l = 1, r = 1;
-    if (i >= bl) l = lhs[i - bl];
-    if (i >= br) r = rhs[i - br];
-    if (l != r) {
-      if (l == 0 || r == 0) {
-        out[i] = 0;
-      } else {
-        CHECK(l == 1 || r == 1)
-          << "operands could not be broadcast together with shapes "
-          << lhs << " " << rhs << ", l=" << l << ", r=" << r;
-        out[i] = std::max(l, r);
-      }
-    } else {
-      out[i] = l;
-    }
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out);
-  return true;
-}
-
-inline bool BinaryBroadcastCorrectLayout(const NodeAttrs& attrs,
-                                         std::vector<Layout> *ilayouts,
-                                         const std::vector<Layout> *last_ilayouts,
-                                         std::vector<Layout> *olayouts) {
-  CHECK_EQ(ilayouts->size(), 2U);
-  CHECK_EQ(olayouts->size(), 1U);
-  Layout lhs = (*ilayouts)[0];
-  Layout rhs = (*ilayouts)[1];
-  Layout out(Layout::Undef());
-
-  if (lhs.defined() && rhs.defined()) {
-    if (lhs == rhs) {
-      NNVM_ASSIGN_LAYOUT(*olayouts, 0, lhs);
-      return true;
-    }
-    // For example, NCHW <-> CHW, N16nCH16cW <-> HCW16c, etc, are broadcast-convertible
-    // because as the definition, CHW can broadcast with NCHW.
-    // For the second case, we can convert HCW16c to CH16cW then it can broadcast with N16nCH16cW.
-    // But CNHW <-> CHW, NCHW16n <-> CHW are not,
-    // because not matter how we adjust the layout of 'CHW',
-    // we can never have an 'N' between 'C' and "HW".
-    size_t l_start = 0, r_start = 0;
-    size_t l = 0, r = 0;
-    bool find_first_match = false;
-    while (l < lhs.ndim() && r < rhs.ndim()) {
-      if (!rhs.contains(Layout::to_superdim(lhs[l]))) {
-        CHECK(!find_first_match) << lhs << " and " << rhs << " are not broadcast-convertible";
-        l_start = ++l;
-      } else if (!lhs.contains(Layout::to_superdim(rhs[r]))) {
-        CHECK(!find_first_match) << lhs << " and " << rhs << " are not broadcast-convertible";
-        r_start = ++r;
-      } else {
-        find_first_match = true;
-        ++l; ++r;
-      }
-    }
-    if (l_start > 0 && r_start > 0) {
-      LOG(FATAL) << lhs << " and " << rhs << " are not broadcast-convertible";
-    } else if (l_start > 0) {
-      rhs = lhs.sublayout(l_start, lhs.ndim()-l_start);
-      out = lhs;
-    } else if (r_start > 0) {
-      lhs = rhs.sublayout(r_start, rhs.ndim()-r_start);
-      out = rhs;
-    } else {
-      // prior to keep left layout
-      rhs = lhs;
-      out = lhs;
-    }
-  } else if (lhs.defined()) {
-    const Layout& last_lhs = last_ilayouts->at(0);
-    if (last_lhs.defined()) {
-      CHECK(lhs.convertible(last_lhs)) << "current lhs layout " << lhs
-                                       << " cannot be converted to the original one " << last_lhs;
-      lhs = last_lhs;
-      // cannot decide output layout
-    }
-  } else if (rhs.defined()) {
-    const Layout& last_rhs = last_ilayouts->at(1);
-    if (last_rhs.defined()) {
-      CHECK(rhs.convertible(last_rhs)) << "current rhs layout " << rhs
-                                       << " cannot be converted to the original one " << last_rhs;
-      rhs = last_rhs;
-      // cannot decide output layout
-    }
-  }
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, lhs);
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 1, rhs);
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, out);
-  return true;
-}
-
-#define NNVM_REGISTER_BINARY_BROADCAST_OP(name, TOPIOp)             \
-  NNVM_REGISTER_OP(name)                                            \
-  .set_num_inputs(2)                                                \
-  .set_num_outputs(1)                                               \
-  .set_attr<FInferShape>("FInferShape", BinaryBroadcastShape)       \
-  .set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)           \
-  .set_attr<FCorrectLayout>("FCorrectLayout",                       \
-    BinaryBroadcastCorrectLayout)                                   \
-  .set_attr<FInplaceOption>("FInplaceOption",                       \
-    [](const NodeAttrs& attrs) {                                    \
-      return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}};     \
-    })                                                              \
-  .set_attr<FTVMCompute>(                                           \
-    "FTVMCompute", [](const NodeAttrs& attrs,                       \
-      const Array<Tensor>& inputs,                                  \
-      const Array<Tensor>& out_info) {                              \
-        return Array<Tensor>{                                       \
-          topi::TOPIOp(inputs[0], inputs[1]) };                     \
-    })                                                              \
-  .add_argument("lhs", "Tensor", "first input")                     \
-  .add_argument("rhs", "Tensor", "second input")
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_add, add)
-.add_alias("__add_symbol__")
-.describe(R"code(Returns element-wise sum of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  1.,  1.],
-        [ 1.,  1.,  1.]]
-
-   y = [[ 0.],
-        [ 1.]]
-
-   broadcast_add(x, y) = [[ 1.,  1.,  1.],
-                          [ 2.,  2.,  2.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return std::vector<NodeEntry>{
-      MakeNode("collapse_sum", n->attrs.name + "_dlhs", { ograds[0], n->inputs[0] }),
-      MakeNode("collapse_sum", n->attrs.name + "_drhs", { ograds[0], n->inputs[1] })
-    };
-});
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_sub, subtract)
-.add_alias("__sub_symbol__")
-.describe(R"code(Returns element-wise difference of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  1.,  1.],
-        [ 1.,  1.,  1.]]
-
-   y = [[ 0.],
-        [ 1.]]
-
-   broadcast_sub(x, y) = [[ 1.,  1.,  1.],
-                          [ 0.,  0.,  0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return std::vector<NodeEntry>{
-      MakeNode("collapse_sum", n->attrs.name + "_dlhs", { ograds[0], n->inputs[0] }),
-      MakeNode("collapse_sum", n->attrs.name + "_drhs", {
-          MakeNode("negative", n->attrs.name + "_drhs_neg", {ograds[0]}),
-          n->inputs[1]
-        })
-    };
-});
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_mul, multiply)
-.add_alias("__mul_symbol__")
-.describe(R"code(Returns element-wise product of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  1.,  1.],
-        [ 1.,  1.,  1.]]
-
-   y = [[ 0.],
-        [ 1.]]
-
-   broadcast_mul(x, y) = [[ 0.,  0.,  0.],
-                          [ 1.,  1.,  1.]]
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    NodeEntry dlhs = MakeNode("collapse_sum", n->attrs.name + "_dlhs_sum", {
-        MakeNode("broadcast_mul", n->attrs.name + "_dlhs_mul",
-                 { n->inputs[1], ograds[0] }),
-        n->inputs[0]
-      });
-    NodeEntry drhs = MakeNode("collapse_sum", n->attrs.name + "_drhs_sum", {
-        MakeNode("broadcast_mul", n->attrs.name + "_drhs_mul",
-                 { n->inputs[0], ograds[0] }),
-        n->inputs[1]
-      });
-    return std::vector<NodeEntry>{ dlhs, drhs };
-});
-
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_div, divide)
-.add_alias("__div_symbol__")
-.describe(R"code(Returns element-wise division of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 6.,  6.,  6.],
-        [ 6.,  6.,  6.]]
-
-   y = [[ 2.],
-        [ 3.]]
-
-   broadcast_div(x, y) = [[ 3.,  3.,  3.],
-                          [ 2.,  2.,  2.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    NodeEntry dlhs = MakeNode("collapse_sum", n->attrs.name + "_dlhs_sum", {
-        MakeNode("broadcast_div", n->attrs.name + "_dlhs_div",
-                 { ograds[0], n->inputs[1] }),
-        n->inputs[0]
-      });
-    NodeEntry dy = MakeNode("broadcast_div", n->attrs.name + "_drhs_div", {
-        NodeEntry{n, 0, 0},
-        MakeNode("negative", n->attrs.name + "_rhs_neg", {n->inputs[1]})
-      });
-    NodeEntry drhs = MakeNode("collapse_sum", n->attrs.name + "_drhs_sum", {
-        MakeNode("broadcast_mul", n->attrs.name + "_drhs_mul", { dy, ograds[0] }),
-        n->inputs[1]
-      });
-    return std::vector<NodeEntry>{ dlhs, drhs };
-});
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_mod, mod)
-.add_alias("__mod_symbol__")
-.describe(R"code(Returns element-wise mod of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 3.]]
-
-   broadcast_mod(x, y) = [[ 1.,  0.,  1.],
-                          [ 1.,  2.,  0.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_max, maximum)
-.add_alias("__max_symbol__")
-.describe(R"code(Returns element-wise max of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 3.]]
-
-   broadcast_max(x, y) = [[ 2.,  2.,  3.],
-                          [ 4.,  5.,  6.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_min, minimum)
-.add_alias("__min_symbol__")
-.describe(R"code(Returns element-wise minimum of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 3.]]
-
-   broadcast_min(x, y) = [[ 1.,  2.,  2.],
-                          [ 3.,  3.,  3.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_pow, power)
-.add_alias("__pow_symbol__")
-.describe(R"code(Returns element-wise x^y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 1.],
-        [ 2.]]
-
-   broadcast_pow(x, y) = [[ 1.,   2.,   3. ],
-                          [ 16.,  25.,  36.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_left_shift, left_shift)
-.add_alias("__left_shift_symbol__")
-.describe(R"code(Returns element-wise x << y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 1.]]
-
-   broadcast_left_shift(x, y) = [[ 4.,  8.,  12.],
-                                 [ 8.,  10., 12.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_right_shift, right_shift)
-.add_alias("__right_shift_symbol__")
-.describe(R"code(Returns element-wise x >> y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 4.,  8.,  12.],
-        [ 8.,  10., 12.]]
-
-   y = [[ 2.],
-        [ 1.]]
-
-   broadcast_right_shift(x, y) = [[ 1.,  2.,  3.],
-                                  [ 4.,  5.,  6.]]
-
-)code" NNVM_ADD_FILELINE);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_greater, greater)
-.add_alias("__greater_symbol__")
-.describe(R"code(Returns element-wise x > y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 3.]]
-
-   broadcast_greater(x, y) = [[ 0.,  0.,  1.],
-                              [ 1.,  1.,  1.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::greater(inputs[0], inputs[1]), out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_less, less)
-.add_alias("__less_symbol__")
-.describe(R"code(Returns element-wise x < y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 3.]]
-
-   broadcast_less(x, y) = [[ 1.,  0.,  0.],
-                           [ 0.,  0.,  0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::less(inputs[0], inputs[1]), out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_equal, equal)
-.add_alias("__equal_symbol__")
-.describe(R"code(Returns element-wise x == y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 5.]]
-
-   broadcast_equal(x, y) = [[ 0.,  1.,  0.],
-                            [ 0.,  1.,  0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::equal(inputs[0], inputs[1]), out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_not_equal, not_equal)
-.add_alias("__not_equal_symbol__")
-.describe(R"code(Returns element-wise x != y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 4.]]
-
-   broadcast_not_equal(x, y) = [[ 1.,  0.,  1.],
-                                [ 0.,  1.,  1.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::not_equal(inputs[0],
-                                                     inputs[1]),
-                                                     out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_greater_equal, greater_equal)
-.add_alias("__greater_equal_symbol__")
-.describe(R"code(Returns element-wise x >= y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 2.],
-        [ 6.]]
-
-   broadcast_greater_equal(x, y) = [[ 0.,  1.,  1.],
-                                    [ 0.,  0.,  1.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::greater_equal(inputs[0],
-                                                         inputs[1]),
-                                                         out_info[0]->dtype) };
-}, 11);
-
-NNVM_REGISTER_BINARY_BROADCAST_OP(broadcast_less_equal, less_equal)
-.add_alias("__less_equal_symbol__")
-.describe(R"code(Returns element-wise x <= y of the input arrays with broadcasting.
-
-Example::
-
-   x = [[ 1.,  2.,  3.],
-        [ 4.,  5.,  6.]]
-
-   y = [[ 1.],
-        [ 5.]]
-
-   broadcast_less_equal(x, y) = [[ 1.,  0.,  0.],
-                                 [ 1.,  1.,  0.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::less_equal(inputs[0],
-                                                      inputs[1]),
-                                                      out_info[0]->dtype) };
-}, 11);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/tensor/elemwise.cc b/nnvm/src/top/tensor/elemwise.cc
deleted file mode 100644
index 5ac6d91dc141..000000000000
--- a/nnvm/src/top/tensor/elemwise.cc
+++ /dev/null
@@ -1,998 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file elemwise.cc
- * \brief Elemenwise operators
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <cmath>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/broadcast.h"
-#include "topi/elemwise.h"
-#include "topi/tags.h"
-#include "../../compiler/compile_engine.h"
-
-namespace nnvm {
-namespace top {
-
-using namespace tvm;
-using namespace nnvm::compiler;
-
-// undefined op
-NNVM_REGISTER_ELEMWISE_UNARY_OP(__undef__)
-.describe(R"code(undefined op.
-
-Used to produce invalide node during optimization.
-
-)code" NNVM_ADD_FILELINE)
-.set_num_outputs(1)
-.set_num_inputs(0);
-
-// floor
-NNVM_REGISTER_ELEMWISE_UNARY_OP(floor)
-.describe(R"code(Take floor input array, computed element-wise.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::floor(inputs[0]) };
-});
-
-// ceil
-NNVM_REGISTER_ELEMWISE_UNARY_OP(ceil)
-.describe(R"code(Take ceil input array, computed element-wise.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::ceil(inputs[0]) };
-});
-
-// trunc
-NNVM_REGISTER_ELEMWISE_UNARY_OP(trunc)
-.describe(R"code(Take truncated value of the input, element-wise.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::trunc(inputs[0]) };
-});
-
-// round
-NNVM_REGISTER_ELEMWISE_UNARY_OP(round)
-.describe(R"code(Round elements of the input to nearest integer.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::round(inputs[0]) };
-});
-
-// abs
-NNVM_REGISTER_ELEMWISE_UNARY_OP(abs)
-.describe(R"code(Take absolute value of elements of the input.
-)code" NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::abs(inputs[0]) };
-});
-
-// sigmoid
-NNVM_REGISTER_ELEMWISE_UNARY_OP(sigmoid)
-.describe(R"code(Computes sigmoid.
-
-.. math::
-  Y = 1 / (1 + exp(-X))
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::sigmoid(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = 1 / (1 + exp(-n0))
-    // grad_0 = grad_y * y * (1 - y)
-    NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
-                              {ograds[0], NodeEntry{n, 0, 0}});
-    NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1",
-                              {NodeEntry{n, 0, 0}}, {{"scalar", "1"}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
-               {sub0, sub1})
-    };
-});
-
-// tanh
-NNVM_REGISTER_ELEMWISE_UNARY_OP(tanh)
-.describe(R"code(Computes hyperbolic tangent.
-
-.. math::
-   Y = sinh(X) / cosh(X)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::tanh(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = sinh(n0) / cosh(n0)
-    // grad_0 = grad_y * (1 - y^2)
-    NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
-                              {NodeEntry{n, 0, 0}, NodeEntry{n, 0, 0}});
-    NodeEntry sub1 = MakeNode("__rsub_scalar__", n->attrs.name + "_grad_sub_1",
-                              {sub0}, {{"scalar", "1"}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
-               {ograds[0], sub1})
-    };
-});
-
-// exp
-NNVM_REGISTER_ELEMWISE_UNARY_OP(exp)
-.describe(R"code(Returns the exp input array, computed element-wise.
-
-.. math::
-   exp(x)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::exp(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = exp(n0)
-    // grad_0 = grad_y * y
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
-               {ograds[0], NodeEntry{n, 0, 0}})
-    };
-});
-
-// log
-NNVM_REGISTER_ELEMWISE_UNARY_OP(log)
-.describe(R"code(Returns the log input array, computed element-wise.
-
-.. math::
-   log(x)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::log(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = log(n0)
-    // grad_0 = grad_y / n0
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_div", n->attrs.name + "_grad_0",
-               {ograds[0], n->inputs[0]})
-    };
-});
-
-// sqrt
-NNVM_REGISTER_ELEMWISE_UNARY_OP(sqrt)
-.describe(R"code(Returns the sqrt input array, computed element-wise.
-
-.. math::
-   \sqrt(x)
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::sqrt(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // y = sqrt(n0)
-    // grad_0 = grad_y / (2 * y)
-    NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
-                              {NodeEntry{n, 0, 0}}, {{"scalar", "2"}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_div", n->attrs.name + "_grad_0",
-             {ograds[0], sub0})
-    };
-});
-
-// binary ops
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_add)
-.describe(R"code(Element-wise add
-
-)code")
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::add(inputs[0], inputs[1]) };
-  })
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0 + n1
-    // grad_0 = grad_y
-    // grad_1 = grad_y
-    return std::vector<NodeEntry>{ MakeNode("copy", n->attrs.name + "_grad_0",
-                                            {ograds[0]}),
-                                   MakeNode("copy", n->attrs.name + "_grad_0",
-                                            {ograds[0]}) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_sub)
-.describe(R"code(Element-wise substraction
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::subtract(inputs[0], inputs[1]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0 - n1
-    // grad_0 = grad_y
-    // grad_1 = - grad_y
-    return std::vector<NodeEntry>{
-      ograds[0],
-      MakeNode("negative", n->attrs.name + "_grad_1", {ograds[0]}),
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mul)
-.describe(R"code(Element-wise multiplication
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::multiply(inputs[0], inputs[1]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0 * n1
-    // grad_0 = grad_y * n1
-    // grad_1 = grad_y * n0
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
-               {ograds[0], n->inputs[1]}),
-      MakeNode("elemwise_mul", n->attrs.name + "_grad_1",
-               {ograds[0], n->inputs[0]})
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_div)
-.describe(R"code(Element-wise division
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::divide(inputs[0], inputs[1]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0 / n1
-    // grad_0 = grad_y / n1
-    // grad_1 = - grad_y * n0 / n1^2
-    NodeEntry sub0 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_0",
-                              {ograds[0], n->inputs[0]});
-    NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1",
-                              {sub0});
-    NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2",
-                              {n->inputs[1], n->inputs[1]});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_div", n->attrs.name + "_grad_0",
-               {ograds[0], n->inputs[1]}),
-      MakeNode("elemwise_div", n->attrs.name + "_grad_1",
-               {sub1, sub2})
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_mod)
-  .describe(R"code(Element-wise modulo
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::mod(inputs[0], inputs[1]) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(elemwise_pow)
-  .describe(R"code(Element-wise power
-
-)code" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::power(inputs[0], inputs[1]) };
-});
-
-// logical
-NNVM_REGISTER_ELEMWISE_BINARY_OP(logical_and)
-.describe(R"code(Elementwise compute the logical AND
-
-)code")
-.set_support_level(4)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::logical_and(inputs[0], inputs[1]) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_OP(logical_or)
-.describe(R"code(Elementwise compute the logical OR
-
-)code")
-.set_support_level(4)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::logical_or(inputs[0], inputs[1]) };
-});
-
-// negative
-NNVM_REGISTER_ELEMWISE_UNARY_OP(negative)
-.describe(R"code(Elemenwise numeric negative
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::negative(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = - n0
-    // grad_0 = - grad_y
-    return std::vector<NodeEntry>{
-      MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]}),
-    };
-});
-
-// logical NOT
-NNVM_REGISTER_ELEMWISE_UNARY_OP(logical_not)
-.describe(R"code(Elementwise compute the logical NOT
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(4)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::logical_not(inputs[0]) };
-});
-
-// copy
-NNVM_REGISTER_ELEMWISE_UNARY_OP(copy)
-.describe(R"code(Copy tensor to another one.
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-      return Array<Tensor>{ topi::identity(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = copy(n0)
-    // grad_0 = grad_y
-    return std::vector<NodeEntry>{ MakeNode("copy", n->attrs.name + "_grad_0",
-                                            {ograds[0]}) };
-});
-
-DMLC_REGISTER_PARAMETER(InitOpParam);
-DMLC_REGISTER_PARAMETER(InitOpWithScalarParam);
-DMLC_REGISTER_PARAMETER(FillValueParam);
-
-// full
-NNVM_REGISTER_INIT_OP(full)
-.describe(R"code(Fill array with scalar value
-
-)code"  NNVM_ADD_FILELINE)
-.set_attr_parser(ParamParser<InitOpWithScalarParam>)
-.set_attr<FGetAttrDict>(
-  "FGetAttrDict", ParamGetAttrDict<InitOpWithScalarParam>)
-.add_arguments(InitOpWithScalarParam::__FIELDS__())
-.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpWithScalarParam>)
-.set_attr<FInferType>("FInferType", ZeroType<InitOpWithScalarParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ZeroLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const InitOpWithScalarParam& param = nnvm::get<InitOpWithScalarParam>(attrs.parsed);
-    Array<Expr> shape = ShapeToArray(param.shape);
-    DataType dtype = GetTVMType(param.dtype);
-    Expr fill_value = tvm::make_const(dtype, param.fill_value);
-    return Array<Tensor>{ topi::full(shape, dtype, fill_value) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_OP(zeros)
-.describe(R"code(Fill target with zeros
-
-)code"  NNVM_ADD_FILELINE)
-.set_attr_parser(ParamParser<InitOpParam>)
-.set_attr<FGetAttrDict>(
-  "FGetAttrDict", ParamGetAttrDict<InitOpParam>)
-.add_arguments(InitOpParam::__FIELDS__())
-.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
-.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ZeroLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const InitOpParam& param = nnvm::get<InitOpParam>(attrs.parsed);
-    Array<Expr> shape = ShapeToArray(param.shape);
-    DataType dtype = GetTVMType(param.dtype);
-    Expr fill_value = tvm::make_const(dtype, 0);
-    return Array<Tensor>{ topi::full(shape, dtype, fill_value) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_OP(ones)
-.describe(R"code(Fill target with ones
-
-)code"  NNVM_ADD_FILELINE)
-.set_attr_parser(ParamParser<InitOpParam>)
-.set_attr<FGetAttrDict>(
-  "FGetAttrDict", ParamGetAttrDict<InitOpParam>)
-.add_arguments(InitOpParam::__FIELDS__())
-.set_attr<FInferShape>("FInferShape", ZeroShape<InitOpParam>)
-.set_attr<FInferType>("FInferType", ZeroType<InitOpParam>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ZeroLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const InitOpParam& param = nnvm::get<InitOpParam>(attrs.parsed);
-    Array<Expr> shape = ShapeToArray(param.shape);
-    DataType dtype = GetTVMType(param.dtype);
-    Expr fill_value = tvm::make_const(dtype, 1);
-    return Array<Tensor>{ topi::full(shape, dtype, fill_value) };
-})
-.set_support_level(4);
-
-// full_like
-NNVM_REGISTER_INIT_LIKE_OP(full_like)
-.describe(R"code(Return an scalar value array with the same shape and type
-as the input array
-
-)code"  NNVM_ADD_FILELINE)
-.add_arguments(FillValueParam::__FIELDS__())
-.set_attr_parser(ParamParser<FillValueParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<FillValueParam>)
-.set_attr<FTVMCompute>(
-    "FTVMCompute", [](const NodeAttrs& attrs,
-                      const Array<Tensor>& inputs,
-                      const Array<Tensor>& out_info) {
-      const FillValueParam& param = nnvm::get<FillValueParam>(attrs.parsed);
-      const Expr fill_value = tvm::make_const(out_info[0]->dtype, param.fill_value);
-      return Array<Tensor> { topi::full_like(inputs[0], fill_value) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_LIKE_OP(zeros_like)
-.describe(R"code(Return an array of zeros with the same shape and type
-as the input array.
-
-)code")
-.set_attr<FTVMCompute>(
-    "FTVMCompute", [](const NodeAttrs& attrs,
-                      const Array<Tensor>& inputs,
-                      const Array<Tensor>& out_info) {
-      return Array<Tensor> { topi::full_like(inputs[0],
-                                             tvm::make_const(out_info[0]->dtype, 0)) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INIT_LIKE_OP(ones_like)
-.describe(R"code(Return an array of ones with the same shape and type
-as the input array.
-
-)code")
-.set_attr<FTVMCompute>(
-    "FTVMCompute", [](const NodeAttrs& attrs,
-                      const Array<Tensor>& inputs,
-                      const Array<Tensor>& out_info) {
-      return Array<Tensor> { topi::full_like(inputs[0],
-                                             tvm::make_const(out_info[0]->dtype, 1)) };
-})
-.set_support_level(4);
-
-// unary scalar op
-DMLC_REGISTER_PARAMETER(ScalarParam);
-
-#define NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(op)                        \
-  NNVM_REGISTER_ELEMWISE_UNARY_OP(op)                                   \
-  .add_arguments(ScalarParam::__FIELDS__())                             \
-  .set_attr_parser(ParamParser<ScalarParam>)                            \
-  .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ScalarParam>)
-
-inline Tensor binary_scalar_op(const NodeAttrs& attrs,
-                               const Tensor& x,
-                               std::function<Expr(Expr, Expr)> f) {
-  const ScalarParam& param = nnvm::get<ScalarParam>(attrs.parsed);
-  auto scalar_val = static_cast<float>(param.scalar);
-  return compute(x->shape, [&](const Array<Var>& i) {
-    auto scalar_const = make_const(x->dtype, scalar_val);
-    return f(x(i), scalar_const);
-    }, "tensor", topi::kElementWise);
-}
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__add_scalar__)
-.describe(R"code(Tensor add scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return x + y; }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    return std::vector<NodeEntry>{ MakeNode("copy", n->attrs.name + "_grad_0",
-                                            {ograds[0]}) };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__sub_scalar__)
-.describe(R"code(Tensor substract scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return x - y; }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    return std::vector<NodeEntry>{ograds[0]};
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rsub_scalar__)
-.describe(R"code(scalar substract Tensor
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return y - x; }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    return std::vector<NodeEntry>{
-      MakeNode("negative", n->attrs.name + "_grad_0", {ograds[0]})
-    };
-});
-
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__lshift_scalar__)
-.describe(R"code(Tensor left shift by scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ScalarParam& param = nnvm::get<ScalarParam>(attrs.parsed);
-    int scalar_val = static_cast<int>(param.scalar);
-    return Array<Tensor>{
-      topi::left_shift(inputs[0],
-                       make_const(inputs[0]->dtype, scalar_val))};
-    });
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rshift_scalar__)
-.describe(R"code(Tensor right shift by scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ScalarParam& param = nnvm::get<ScalarParam>(attrs.parsed);
-    int scalar_val = static_cast<int>(param.scalar);
-    return Array<Tensor>{
-      topi::right_shift(inputs[0],
-                        make_const(inputs[0]->dtype, scalar_val))};
-  });
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__mul_scalar__)
-.describe(R"code(Tensor multiplies scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return x * y; }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0 * scalar
-    // grad_0 = grad_y * scalar
-    return std::vector<NodeEntry>{
-      MakeNode("__mul_scalar__", n->attrs.name + "_grad_0",
-               {ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}})
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__div_scalar__)
-.describe(R"code(Tensor divides scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return x / y; }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0 / scalar
-    // grad_0 = grad_y / scalar
-    return std::vector<NodeEntry>{
-      MakeNode("__div_scalar__", n->attrs.name + "_grad_0",
-               {ograds[0]}, {{"scalar", n->attrs.dict["scalar"]}})
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rdiv_scalar__)
-.describe(R"code(scalar divides Tensor
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return y / x; }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = scalar / n0
-    // grad_0 = - grad_y * scalar / n0^2
-    NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
-                              {ograds[0]},
-                              {{"scalar", n->attrs.dict["scalar"]}});
-    NodeEntry sub1 = MakeNode("negative", n->attrs.name + "_grad_sub_1",
-                              {sub0});
-    NodeEntry sub2 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_2",
-                              {n->inputs[0], n->inputs[0]});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_div", n->attrs.name + "_grad_0",
-               {sub1, sub2})
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__pow_scalar__)
-.describe(R"code(Tensor power scalar
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return tvm::pow(x, y); }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = n0^scalar
-    // grad_0 = grad_y * scalar * n0^(scalar - 1)
-    double scalar = std::stod(n->attrs.dict["scalar"]);
-    NodeEntry sub0 = MakeNode("__pow_scalar__", n->attrs.name + "_grad_sub_0",
-                              {n->inputs[0]},
-                              {{"scalar", std::to_string(scalar - 1)}});
-    NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1",
-                              {ograds[0]},
-                              {{"scalar", std::to_string(scalar)}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad_0",
-               {sub0, sub1})
-    };
-});
-
-NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__)
-.describe(R"code(scalar power Tensor
-
-)code"  NNVM_ADD_FILELINE)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ binary_scalar_op(attrs, inputs[0],
-      [](Expr x, Expr y) { return tvm::pow(y, x); }) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = scalar^n0
-    // grad_0 = grad_y * scalar^n0 * log(scalar)
-    double num = std::stod(n->attrs.dict["scalar"]);
-    NodeEntry sub0 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_0",
-                              {NodeEntry{n, 0, 0}},
-                              {{"scalar", std::to_string(std::log(num))}});
-    return std::vector<NodeEntry>{
-      MakeNode("__mul_symbol__", n->attrs.name + "_grad_0",
-               {ograds[0], sub0})
-    };
-});
-
-DMLC_REGISTER_PARAMETER(ElementWiseReduceParam);
-
-NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum)
-.describe(R"code(Adds all input arguments element-wise.
-
-)code"  NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ElementWiseReduceParam& param = nnvm::get<ElementWiseReduceParam>(attrs.parsed);
-    CHECK_EQ(param.num_args, inputs.size()) << """Compute definition of elemwise sum""";
-    return Array<Tensor>{ topi::elemwise_sum(inputs) };
-})
-.set_attr<nnvm::FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    CHECK_EQ(ograds.size(), 1);
-    std::vector<NodeEntry> ret;
-    for (size_t i = 0; i < n->inputs.size(); i++) {
-      ret.push_back(MakeNode("copy", n->attrs.name + "_grad_0", {ograds[0]}));
-    }
-    return ret;
-  })
-.set_support_level(4);
-
-NNVM_REGISTER_ELEMWISE_UNARY_OP(block_grad)
-.describe(R"code(Blocks gradient computation for input.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<nnvm::FInplaceIdentity>(
-  "FInplaceIdentity", [](const NodeAttrs& attrs){
-    return std::vector<bool>{true};
-})
-.set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
-.set_support_level(4);
-
-DMLC_REGISTER_PARAMETER(IndicatorParam);
-
-// indicator function
-NNVM_REGISTER_INDICATOR_OP(greater)
-.describe(R"code(Greater function that returns a mask tensor
-with 1.0 if (left > right), otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("lhs", "Tensor", "First input")
-.add_argument("rhs", "Tensor", "Second input")
-.set_num_inputs(2)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::greater(inputs[0], inputs[1]), out_info[0]->dtype) };
-})
-.set_support_level(4);
-
-
-NNVM_REGISTER_INDICATOR_OP(less)
-  .describe(R"code(Less function that returns a mask tensor
-with 1.0 if (left < right), otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("lhs", "Tensor", "First input")
-.add_argument("rhs", "Tensor", "Second input")
-.set_num_inputs(2)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<2, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::cast(topi::less(inputs[0], inputs[1]), out_info[0]->dtype) };
-})
-.set_support_level(4);
-
-NNVM_REGISTER_INDICATOR_OP(_max_mask)
-  .describe(R"code(Function that returns a mask tensor
-with 1.0 if the value is maximum over given axes, otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input")
-.set_num_inputs(1)
-.add_arguments(IndicatorParam::__FIELDS__())
-.set_attr_parser(ParamParser<IndicatorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_support_level(1);
-
-NNVM_REGISTER_INDICATOR_OP(_min_mask)
-  .describe(R"code(Function that returns a mask tensor
-with 1.0 if the value is minimum over given axes, otherwise 0.0 element-wise.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input")
-.set_num_inputs(1)
-.add_arguments(IndicatorParam::__FIELDS__())
-.set_attr_parser(ParamParser<IndicatorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_support_level(1);
-
-
-DMLC_REGISTER_PARAMETER(ClipParam);
-
-NNVM_REGISTER_OP(clip)
-.describe(R"doc(Clips (limits) the values in an array.
-Given an interval, values outside the interval are clipped to the interval edges.
-Clipping ``x`` between `a_min` and `a_x` would be::
-   clip(x, a_min, a_max) = max(min(x, a_max), a_min))
-Example::
-    x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-    clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<ClipParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ClipParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ClipParam params = get<ClipParam>(attrs.parsed);
-    return Array<Tensor>{
-      topi::clip(inputs[0], tvm::make_const(tvm::DataType::Float(32), params.a_min),
-                 tvm::make_const(tvm::DataType::Float(32), params.a_max)) };
-  })
-.add_argument("data", "NDArray-or-Symbol", "Input array.")
-.add_arguments(ClipParam::__FIELDS__())
-.set_attr<nnvm::FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    // y = clip(x, a_min, a_max)
-    // min_mask = greater_equal(x, a_min*ones_like(x))
-    //          => ones_like(x) - less(x, a_min)
-    // max_mask = less_equal(x, a_max*ones_like(x))
-    //          => ones_like(x) - greater(x, a_max)
-    // grad_x = min_mask * max_mask * grad_y
-    CHECK_EQ(ograds.size(), 1);
-
-    NodeEntry sub0 = MakeNode("ones_like", n->attrs.name + "_grad_sub_0",
-                              {n->inputs[0]});
-    // min_mask
-    NodeEntry sub1 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_1",
-                              {sub0}, {{"scalar", n->attrs.dict["a_min"]}});
-    NodeEntry sub2 = MakeNode("less", n->attrs.name + "_grad_sub_2",
-                              {n->inputs[0], sub1});
-    NodeEntry sub3 = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub_3",
-                              {sub0, sub2});
-
-    // max_mask
-    NodeEntry sub4 = MakeNode("__mul_scalar__", n->attrs.name + "_grad_sub_4",
-                              {sub0}, {{"scalar", n->attrs.dict["a_max"]}});
-    NodeEntry sub5 = MakeNode("greater", n->attrs.name + "_grad_sub_5",
-                              {n->inputs[0], sub4});
-    NodeEntry sub6 = MakeNode("elemwise_sub", n->attrs.name + "_grad_sub_6",
-                              {sub0, sub5});
-
-    // min_mask * max_mask
-    NodeEntry sub7 = MakeNode("elemwise_mul", n->attrs.name + "_grad_sub_7",
-                              {sub3, sub6});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad",
-               {sub7, ograds[0]})
-    };
-  })
-.set_support_level(4);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/tensor/matrix_op.cc b/nnvm/src/top/tensor/matrix_op.cc
deleted file mode 100644
index b1810f40de20..000000000000
--- a/nnvm/src/top/tensor/matrix_op.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file matrix_op.cc
- * \brief Matrix operators
- */
-#include <topi/transform.h>
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/tensor.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-
-using namespace nnvm::compiler;
-
-DMLC_REGISTER_PARAMETER(MatMulParam);
-
-inline bool DotShape(const nnvm::NodeAttrs& attrs,
-                     std::vector<TShape> *in_attrs,
-                     std::vector<TShape> *out_attrs) {
-  const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  TShape lshape = (*in_attrs)[0];
-  TShape rshape = (*in_attrs)[1];
-
-  if (lshape.ndim() == 1)  lshape = TShape{1, lshape[0]};
-  if (rshape.ndim() == 1) rshape = TShape{1, rshape[0]};
-
-  if (param.transpose_a) std::reverse(lshape.begin(), lshape.end());
-  if (param.transpose_b) std::reverse(rshape.begin(), rshape.end());
-
-  CHECK_EQ(lshape[lshape.ndim() - 1], rshape[0])
-    << "dot shape inconsistent: " << lshape << " X " << rshape;
-
-  TShape oshape(lshape.ndim() + rshape.ndim() - 2);
-  for (uint32_t i = 0; i < lshape.ndim() - 1; i++) oshape[i] = lshape[i];
-  for (uint32_t i = 1; i < rshape.ndim(); i++) oshape[i + lshape.ndim() - 2] = rshape[i];
-
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
-  return true;
-}
-
-inline bool DotCorrectLayout(const NodeAttrs& attrs,
-                             std::vector<Layout> *ilayouts,
-                             const std::vector<Layout> *last_ilayouts,
-                             std::vector<Layout> *olayouts) {
-  const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
-  CHECK_EQ(ilayouts->size(), 2U);
-  CHECK_EQ(olayouts->size(), 1U);
-  const Layout& lhs = last_ilayouts->at(0).defined() ? last_ilayouts->at(0)
-                                                     : ilayouts->at(0);
-  const Layout& rhs = last_ilayouts->at(1).defined() ? last_ilayouts->at(1)
-                                                     : ilayouts->at(1);
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, lhs);
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 1, rhs);
-
-  if (lhs.ndim() > 1 && rhs.ndim() > 1) {
-    // concat lhs and rhs layout
-    const Layout& lhs_out = param.transpose_a ? lhs.reverse() : lhs;
-    const Layout& rhs_out = param.transpose_b ? rhs.reverse() : rhs;
-    Layout out = lhs_out.sublayout(0, lhs_out.ndim()-1) +
-        rhs_out.sublayout(1, rhs_out.ndim()-1);
-    NNVM_ASSIGN_LAYOUT(*olayouts, 0, out);
-  }
-  return true;
-}
-
-NNVM_REGISTER_OP(matmul)
-.describe(R"doc(Matrix multiplication of two arrays.
-
-``dot``'s behavior depends on the input array dimensions:
-
-- 1-D arrays: inner product of vectors
-- 2-D arrays: matrix multiplication
-- N-D arrays: a sum product over the last axis of the first input and the first
-  axis of the second input
-
-  For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape `(k,r,s)`, the
-  result array will have shape `(n,m,r,s)`. It is computed by::
-
-    dot(x,y) = sum(x[i,j,:]*y[:,a,b])
-
-)doc" NNVM_ADD_FILELINE)
-.set_support_level(1)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<MatMulParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MatMulParam>)
-.add_arguments(MatMulParam::__FIELDS__())
-.add_argument("lhs", "NDArray-or-Symbol", "The first input")
-.add_argument("rhs", "NDArray-or-Symbol", "The second input")
-.set_attr<FInferShape>("FInferShape", DotShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", DotCorrectLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const MatMulParam& param = nnvm::get<MatMulParam>(attrs.parsed);
-    return Array<Tensor>{
-      topi::matmul(inputs[0], inputs[1], param.transpose_a, param.transpose_b)
-    };
-  })
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    // z = x dot y
-    // xshape (n,m,k), yshape (k,r,s)
-    const MatMulParam& param = nnvm::get<MatMulParam>(n->attrs.parsed);
-    bool Ta = param.transpose_a;
-    bool Tb = param.transpose_b;
-    // Ta = false, Tb = false
-    // grad_x = grad_z dot y.T
-    // grad_y = x.T dot grad_z
-    if (!Ta && !Tb) {
-      return std::vector<NodeEntry>{
-        MakeNode("matmul", n->attrs.name + "_grad_0",
-                 {ograds[0], n->inputs[1]},
-                 {{"transpose_a", "false"},
-                  {"transpose_b", "true"}}),
-        MakeNode("matmul", n->attrs.name + "_grad_1",
-                 {n->inputs[0], ograds[0]},
-                 {{"transpose_a", "true"},
-                  {"transpose_b", "false"}})
-      };
-    } else if (Ta && !Tb) {
-      // Ta = true, Tb = false
-      // grad_x = y dot grad_z.T
-      // grad_y = x dot grad_z
-      return std::vector<NodeEntry>{
-        MakeNode("matmul", n->attrs.name + "_grad_0",
-                 {n->inputs[1], ograds[0]},
-                 {{"transpose_a", "false"},
-                  {"transpose_b", "true"}}),
-        MakeNode("matmul", n->attrs.name + "_grad_1",
-                 {n->inputs[0], ograds[0]},
-                 {{"transpose_a", "false"},
-                  {"transpose_b", "false"}})
-      };
-    } else if (!Ta && Tb) {
-      // Ta = false, Tb = true
-      // grad_x = grad_z dot y
-      // grad_y = grad_z.T dot x
-      return std::vector<NodeEntry>{
-        MakeNode("matmul", n->attrs.name + "_grad_0",
-                 {ograds[0], n->inputs[1]},
-                 {{"transpose_a", "false"},
-                  {"transpose_b", "false"}}),
-        MakeNode("matmul", n->attrs.name + "_grad_1",
-                 {ograds[0], n->inputs[0]},
-                 {{"transpose_a", "true"},
-                  {"transpose_b", "false"}})
-      };
-    } else {
-      // Ta = true, Tb = true
-      // grad_x = y.T dot grad_z.T
-      // grad_y = grad_z.T dot x.T
-      return std::vector<NodeEntry>{
-        MakeNode("matmul", n->attrs.name + "_grad_0",
-                 {n->inputs[1], ograds[0]},
-                 {{"transpose_a", "true"},
-                  {"transpose_b", "true"}}),
-        MakeNode("matmul", n->attrs.name + "_grad_1",
-                 {ograds[0], n->inputs[0]},
-                 {{"transpose_a", "true"},
-                  {"transpose_b", "true"}})
-      };
-    }
-});
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/tensor/reduce.cc b/nnvm/src/top/tensor/reduce.cc
deleted file mode 100644
index dd8e23cf6fe9..000000000000
--- a/nnvm/src/top/tensor/reduce.cc
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file reduce.cc
- * \brief reduce operator.
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <numeric>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/detail/constant_utils.h"
-#include "topi/elemwise.h"
-#include "topi/reduction.h"
-#include "topi/transform.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-
-// reduce
-DMLC_REGISTER_PARAMETER(ReduceParam);
-
-inline TShape GetReduceAxes(const uint32_t indim,
-                            const TShape& axis,
-                            bool exclude) {
-  if (axis.ndim() == 0) {
-    TShape r_axes(indim);
-    std::iota(r_axes.begin(), r_axes.end(), 0);
-    return r_axes;
-  }
-
-  CHECK_LT(axis[axis.ndim() - 1], indim)
-    << "Reduction axis " << axis[axis.ndim() - 1]
-    << " exceeds input dimensions " << indim;
-
-  TShape in_axis = axis;
-  for (auto& i : in_axis) {
-    i = i < 0 ? i + indim : i;
-    CHECK_GE(i, 0) << "axis out of bounds in reduce operator";
-    CHECK_LT(i, indim) << "axis out of bounds in reduce operator";
-  }
-  std::sort(in_axis.begin(), in_axis.end());
-  if (!exclude) return in_axis;
-  TShape r_axis(indim - in_axis.ndim());
-  for (unsigned i = 0, j = 0, k = 0; i < indim; ++i) {
-    if (j < in_axis.ndim() && i == in_axis[j]) {
-        ++j;
-        continue;
-    }
-    r_axis[k++] = i;
-  }
-  return r_axis;
-}
-
-inline TShape ReduceShapeImpl(const TShape& ishape,
-                              const TShape& axis,
-                              bool keepdims,
-                              bool exclude) {
-  uint32_t indim = ishape.ndim();
-  TShape r_axes = GetReduceAxes(indim, axis, exclude);
-  if (!r_axes.ndim()) return ishape;
-  if (r_axes.ndim() == indim)
-    return TShape(keepdims ? indim : 1);
-
-  CHECK(r_axes.ndim() < indim);
-  if (keepdims) {
-    TShape oshape(ishape);
-    for (unsigned i = 0, j = 0; i < indim; ++i) {
-      if (j >= r_axes.ndim() || i != r_axes[j]) continue;
-      oshape[i] = 1;
-      ++j;
-    }
-    return oshape;
-  }
-
-  TShape oshape(indim - r_axes.ndim());
-  for (unsigned i = 0, j = 0, k = 0; i < indim; ++i) {
-    if (j < r_axes.ndim() && i == r_axes[j]) {
-      ++j;
-      continue;
-    }
-    oshape[k++] = ishape[i];
-  }
-  return oshape;
-}
-
-inline bool ReduceShape(const nnvm::NodeAttrs& attrs,
-                        std::vector<TShape>* in_attrs,
-                        std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  if ((*in_attrs)[0].ndim() == 0) return false;
-  const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-  NNVM_ASSIGN_OUTPUT_SHAPE(
-      attrs, *out_attrs, 0,
-      ReduceShapeImpl((*in_attrs)[0], param.axis,
-                      param.keepdims, param.exclude));
-  return true;
-}
-
-inline bool CollapseShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape>* in_attrs,
-                          std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  if ((*in_attrs)[0].ndim() == 1) return false;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, (*in_attrs)[1]);
-  return true;
-}
-
-template<typename PType>
-inline void AxesParamParser(nnvm::NodeAttrs* attrs) {
-  PType param;
-  param.Init(attrs->dict);
-  std::sort(&param.axis[0], &param.axis[param.axis.ndim()]);
-  attrs->parsed = std::move(param);
-}
-
-#define NNVM_REGISTER_BASE_REDUCE_OP(op)                                 \
-  NNVM_REGISTER_OP(op)                                                   \
-  .add_arguments(ReduceParam::__FIELDS__())                              \
-  .set_attr_parser(AxesParamParser<ReduceParam>)                         \
-  .set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReduceParam>) \
-  .set_num_outputs(1)
-
-#define NNVM_REGISTER_REDUCE_OP(op)                                     \
-  NNVM_REGISTER_BASE_REDUCE_OP(op)                                      \
-  .add_argument("data", "Tensor", "The input")                          \
-  .set_attr<FInferShape>("FInferShape", ReduceShape)                    \
-  .set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)               \
-  .set_attr<FCorrectLayout>("FCorrectLayout",                           \
-    ElemwiseFixedLayoutUnknownOut<1, 1>)                                \
-  .set_num_inputs(1)
-
-NNVM_REGISTER_REDUCE_OP(sum)
-.describe(R"code(Computes the sum of array elements over given axes.
-
-Example::
-
-  data = [[[1,2],[2,3],[1,3]],
-          [[1,4],[4,3],[5,2]],
-          [[7,1],[7,2],[7,3]]]
-
-  sum(data, axis=1)
-  [[  4.   8.]
-   [ 10.   9.]
-   [ 21.   6.]]
-
-  sum(data, axis=[1,2])
-  [ 12.  19.  27.]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
-    auto axis = ShapeToIntArray(r_axes);
-    return Array<Tensor>{
-      topi::sum(inputs[0], axis, param.keepdims, true) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
-    bool exclude = param.exclude;
-    TShape p_axis = param.axis;
-    if (!param.exclude && param.axis.ndim() == 0) {
-      exclude = true;
-      p_axis = TShape();
-    }
-    std::ostringstream axis; axis << p_axis;
-    return std::vector<NodeEntry>{
-      MakeNode("expand_like", n->attrs.name + "_grad",
-               {ograds[0], n->inputs[0]},
-               {{"axis", axis.str()},
-                {"exclude", std::to_string(exclude)}})
-  };
-});
-
-NNVM_REGISTER_REDUCE_OP(max)
-.describe(R"code(Computes the max of array elements over given axes.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    auto axis = ShapeToIntArray(r_axes);
-    return Array<Tensor>{
-      topi::max(inputs[0], axis, param.keepdims, true) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
-    std::ostringstream axis; axis << param.axis;
-    NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
-                             {ograds[0], n->inputs[0]},
-                             {{"axis", axis.str()},
-                              {"exclude", std::to_string(param.exclude)}});
-    NodeEntry sub1 = MakeNode("_max_mask", n->attrs.name + "_grad_sub1",
-                              {ograds[0]},
-                              {{"axis", axis.str()},
-                               {"exclude", std::to_string(param.exclude)}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
-    };
-});
-
-NNVM_REGISTER_REDUCE_OP(min)
-.describe(R"code(Computes the min of array elements over given axes.
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    auto axis = ShapeToIntArray(r_axes);
-    return Array<Tensor>{
-      topi::min(inputs[0], axis, param.keepdims, true) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    const ReduceParam& param = nnvm::get<ReduceParam>(n->attrs.parsed);
-    std::ostringstream axis; axis << param.axis;
-    NodeEntry sub0 = MakeNode("expand_like", n->attrs.name + "_grad_sub0",
-                              {ograds[0], n->inputs[0]},
-                              {{"axis", axis.str()},
-                               {"exclude", std::to_string(param.exclude)}});
-    NodeEntry sub1 = MakeNode("_min_mask", n->attrs.name + "_grad_sub1",
-                              {ograds[0]},
-                              {{"axis", axis.str()},
-                               {"exclude", std::to_string(param.exclude)}});
-    return std::vector<NodeEntry>{
-      MakeNode("elemwise_mul", n->attrs.name + "_grad", {sub0, sub1})
-    };
-});
-
-NNVM_REGISTER_BASE_REDUCE_OP(collapse_sum)
-.add_argument("data", "Tensor", "The input")
-.add_argument("as", "Tensor", "The reference")
-.set_attr<FInferShape>("FInferShape", CollapseShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<2, 1>)
-.set_num_inputs(2)
-.describe(R"code(Reduces lhs to the shape of rhs via sum)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::collapse_sum(inputs[0], inputs[1]->shape) };
-});
-
-inline bool InferFixedType(const NodeAttrs& attrs,
-                          std::vector<int>* in_attrs,
-                          std::vector<int>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-  NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, param.dtype);
-  return true;
-}
-
-NNVM_REGISTER_BASE_REDUCE_OP(argmax)
-.describe(R"code(Creates an operation that finds the indices of the maximum
-values over a given axis.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "The input")
-.set_attr<FInferShape>("FInferShape", ReduceShape)
-.set_attr<FInferType>("FInferType", InferFixedType)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    auto axis = ShapeToIntArray(r_axes);
-    Tensor out = topi::argmax(inputs[0], axis, param.keepdims, true);
-    if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype);
-    return Array<Tensor>{out};
-});
-
-NNVM_REGISTER_BASE_REDUCE_OP(argmin)
-.describe(R"code(Creates an operation that finds the indices of the minimum
-values over a given axis.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "The input")
-.set_attr<FInferShape>("FInferShape", ReduceShape)
-.set_attr<FInferType>("FInferType", InferFixedType)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    auto axis = ShapeToIntArray(r_axes);
-    Tensor out = topi::argmin(inputs[0], axis, param.keepdims, true);
-    if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype);
-    return Array<Tensor>{out};
-});
-
-NNVM_REGISTER_REDUCE_OP(mean)
-  .describe(R"code(Computes the mean of array elements over given axes.
-
-Example::
-
-  data = [[[1,2],[2,3],[1,3]],
-          [[1,4],[4,3],[5,2]],
-          [[7,1],[7,2],[7,3]]]
-
-  mean(data)
-  [3.22]
-
-  mean(data, axis=[1,2])
-  [ 2.  3.16666667  4.5]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
-    auto axis = ShapeToIntArray(r_axes);
-
-    Expr count = make_const(inputs[0]->dtype, 1);
-    for (auto& i : r_axes) {
-      count *= cast(inputs[0]->dtype, inputs[0]->shape[i]);
-    }
-
-    return Array<Tensor>{
-      topi::divide(topi::sum(inputs[0], axis, param.keepdims, true), count) };
-});
-
-NNVM_REGISTER_REDUCE_OP(prod)
-  .describe(R"code(Computes the products of array elements over given axes.
-
-Example::
-
-  data = [[[1,2],[2,3],[1,3]],
-          [[1,4],[4,3],[5,2]],
-          [[7,1],[7,2],[7,3]]]
-
-  mean(data, axis=1)
-  [35562240]
-
-  mean(data, axis=[1,2])
-  [ 36  480  2058]
-
-)code" NNVM_ADD_FILELINE)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
-    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
-                                  param.axis, param.exclude);
-    if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
-    auto axis = ShapeToIntArray(r_axes);
-    return Array<Tensor>{
-      topi::prod(inputs[0], axis, param.keepdims, true) };
-});
-
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/tensor/state_op.cc b/nnvm/src/top/tensor/state_op.cc
deleted file mode 100644
index 23c7158aecd3..000000000000
--- a/nnvm/src/top/tensor/state_op.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file state_op.cc
- * \brief Experimental operators
- *   Currently we only support assign
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/top/tensor.h>
-#include <topi/elemwise.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-
-using namespace tvm;
-using namespace nnvm::compiler;
-
-NNVM_REGISTER_OP(_assign)
-.describe(R"doc(Assign rhs to the lhs.
-
-lhs must be a Variable.
-This is an experimental operator.
-
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FMutateInputs>(
-  "FMutateInputs", [](const NodeAttrs& attrs) {
-    return std::vector<uint32_t>{0};
-})
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    // This implementation is needed for the special
-    // logic handling assign in the compiler
-    // It simply copies the result of rhs the output
-    // The later decoration in compiler will change
-    // the memory assignment of assign to tie
-    // the lhs to the output.
-    return Array<Tensor>{ topi::identity(inputs[1]) };
-})
-.set_attr<FInferShape>("FInferShape", SameShape)
-.set_attr<FCorrectLayout>(
-  "FCorrectLayout", [](const NodeAttrs& attrs,
-                     std::vector<Layout> *in_layouts,
-                     const std::vector<Layout> *last_in_layouts,
-                     std::vector<Layout> *out_layouts) {
-  NNVM_ASSIGN_LAYOUT(*in_layouts, 1, (*in_layouts)[0]);
-  NNVM_ASSIGN_LAYOUT(*out_layouts, 0, (*in_layouts)[0]);
-  return true;
-})
-.set_attr<FInplaceOption>(
-  "FInplaceOption", [](const NodeAttrs& attrs) {
-    return std::vector<std::pair<int, int> >{{1, 0}};
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    return std::vector<NodeEntry>{
-      MakeNode("zeros_like", n->attrs.name + "_zero_grad",
-               {n->inputs[0]}),
-      ograds[0]
-    };
-});
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/tensor/transform.cc b/nnvm/src/top/tensor/transform.cc
deleted file mode 100644
index a83f447a60f4..000000000000
--- a/nnvm/src/top/tensor/transform.cc
+++ /dev/null
@@ -1,1500 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file transform.cc
- * \brief Injective transformation of shape or type.
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include <nnvm/compiler/util.h>
-#include <nnvm/top/tensor.h>
-#include <cctype>
-#include <sstream>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-#include "topi/nn/flatten.h"
-#include "topi/transform.h"
-#include "topi/elemwise.h"
-#include "topi/detail/constant_utils.h"
-#include "../../compiler/compile_engine.h"
-
-namespace nnvm {
-namespace top {
-using namespace tvm;
-using namespace nnvm::compiler;
-
-// flatten
-inline bool FlattenInferShape(const NodeAttrs& attrs,
-                              std::vector<TShape>* in_attrs,
-                              std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape &dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0) return false;
-  uint32_t target_dim = 1;
-  for (uint32_t i = 1; i < dshape.ndim(); ++i) {
-    target_dim *= dshape[i];
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0,
-                           TShape({dshape[0], target_dim}));
-  return true;
-}
-
-NNVM_REGISTER_OP(flatten)
-.describe(R"code(Flattens the input into a 2-D array.
-
-For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation reshapes
-the input array into an output array of shape ``(d1, d2*...*dk)``.
-
-Example::
-
-    x = [[
-        [1,2,3],
-        [4,5,6],
-        [7,8,9]
-    ],
-    [   [1,2,3],
-        [4,5,6],
-        [7,8,9]
-    ]],
-
-    flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
-       [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]
-
-)code" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", FlattenInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.add_argument("data", "Tensor", "Input data.")
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::nn::flatten(inputs[0]) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    return MakeGradNode("reshape_like", n,
-                        {ograds[0], n->inputs[0]});
-})
-.set_support_level(1);
-
-// concatenate
-DMLC_REGISTER_PARAMETER(ConcatenateParam);
-
-inline bool ConcatenateInferShape(const NodeAttrs& attrs,
-                                  std::vector<TShape>* in_shape,
-                                  std::vector<TShape>* out_shape) {
-  const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
-  TShape dshape;
-  dim_t size = 0;
-  bool has_zero = false;
-  int axis = param.axis >= 0 ? param.axis : in_shape->at(0).ndim() + param.axis;
-  for (size_t i = 0; i < in_shape->size(); ++i) {
-    TShape tmp = (*in_shape)[i];
-    if (tmp.ndim()) {
-      CHECK_LT(static_cast<dim_t>(axis), tmp.ndim())
-          << "concat dim " << axis << " out of range of input shape " << tmp;
-      has_zero = tmp[axis] == 0 || has_zero;
-      size += tmp[axis];
-      tmp[axis] = 0;
-      shape_assign(&dshape, tmp);
-    }
-  }
-
-  TShape tmp = (*out_shape)[0];
-  if (tmp.ndim()) {
-    CHECK_LT(static_cast<dim_t>(axis), tmp.ndim())
-        << "concat dim " << axis << " out of range of input shape " << tmp;
-    tmp[axis] = 0;
-    shape_assign(&dshape, tmp);
-  }
-
-  if (dshape.ndim() == 0) return false;
-
-  for (size_t i = 0; i < in_shape->size(); ++i) {
-    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, i, dshape);
-  }
-
-  if (!has_zero) dshape[axis] = size;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, dshape);
-  return dshape.Size() != 0;
-}
-
-inline bool ConcatenateCorrectLayout(const NodeAttrs& attrs,
-                                     std::vector<Layout> *ilayouts,
-                                     const std::vector<Layout> *last_ilayouts,
-                                     std::vector<Layout> *olayouts) {
-  const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
-  CHECK_EQ(ilayouts->size(), last_ilayouts->size());
-  CHECK_EQ(olayouts->size(), 1U);
-
-  Layout layout;
-  if (!ilayouts->at(0).defined()) {
-    layout = last_ilayouts->at(0);
-  } else if (param.axis >= static_cast<int>(ilayouts->at(0).ndim())) {
-    CHECK(last_ilayouts->at(0).defined())
-      << "Current input layout " << ilayouts->at(0)
-      << " is invalid but last input layout is not "
-         "defined for the first input.";
-    layout = last_ilayouts->at(0);
-  } else if (last_ilayouts->at(0).defined()
-             && ilayouts->at(0)[param.axis]
-                != last_ilayouts->at(0)[param.axis]) {
-    layout = last_ilayouts->at(0);
-  } else {
-    layout = ilayouts->at(0);
-  }
-
-  for (size_t i = 0; i < ilayouts->size(); ++i) {
-    NNVM_ASSIGN_LAYOUT(*ilayouts, i, layout);
-  }
-  NNVM_ASSIGN_LAYOUT(*olayouts, 0, layout);
-  return true;
-}
-
-NNVM_REGISTER_OP(concatenate)
-.describe(R"code(Joins input arrays along a given axis.
-
-The dimensions of the input arrays should be the same except the axis along
-which they will be concatenated.
-The dimension of the output array along the concatenated axis will be equal
-to the sum of the corresponding dimensions of the input arrays.
-
-Example::
-
-   x = [[1,1],[2,2]]
-   y = [[3,3],[4,4],[5,5]]
-   z = [[6,6], [7,7],[8,8]]
-
-   concatenate(x,y,z,axis=0) = [[ 1.,  1.],
-                               [ 2.,  2.],
-                               [ 3.,  3.],
-                               [ 4.,  4.],
-                               [ 5.,  5.],
-                               [ 6.,  6.],
-                               [ 7.,  7.],
-                               [ 8.,  8.]]
-
-   Note that you cannot concat x,y,z along dimension 1 since dimension
-   0 is not the same for all the input arrays.
-
-   concatenate(y,z,axis=1) = [[ 3.,  3.,  6.,  6.],
-                             [ 4.,  4.,  7.,  7.],
-                             [ 5.,  5.,  8.,  8.]]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor-or-Tensor[]", "List of arrays to concatenate")
-.add_arguments(ConcatenateParam::__FIELDS__())
-.set_attr_parser(ParamParser<ConcatenateParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ConcatenateParam>)
-.set_attr<FInferShape>("FInferShape", ConcatenateInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ConcatenateCorrectLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
-    return Array<Tensor>{ topi::concatenate(inputs, param.axis) };
-})
-.set_num_outputs(1)
-.set_num_inputs(kVarg)
-.set_support_level(1);
-
-// expand_dims
-DMLC_REGISTER_PARAMETER(ExpandDimsParam);
-
-inline bool ExpandDimsInferShape(const NodeAttrs& attrs,
-                                 std::vector<TShape>* in_shape,
-                                 std::vector<TShape>* out_shape) {
-  const ExpandDimsParam& param = nnvm::get<ExpandDimsParam>(attrs.parsed);
-  CHECK_EQ(in_shape->size(), 1U);
-  const TShape& dshape = in_shape->at(0);
-  int ndim = static_cast<int>(dshape.ndim());
-  CHECK(param.axis >= -ndim - 1 && param.axis <= ndim)
-    << "with axis = " << param.axis << " ndim = " << ndim;
-  int axis = param.axis < 0 ? ndim + param.axis + 1 : param.axis;
-  std::vector<dim_t> oshape;
-  for (int i = 0; i < axis; ++i) {
-    oshape.push_back(dshape[i]);
-  }
-  for (int i = 0; i < param.num_newaxis; ++i) {
-    oshape.push_back(1);
-  }
-  for (int i = axis; i < ndim; ++i) {
-    oshape.push_back(dshape[i]);
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0,
-                           TShape(oshape.begin(), oshape.end()));
-  return true;
-}
-
-NNVM_REGISTER_OP(expand_dims)
-.describe(R"code(Inserts a new axis of size 1 into the array shape
-
-For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1, num_newaxis=5)``
-will return a new array with shape ``(2,1,1,1,1,1,3,4)``.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input tensor")
-.add_arguments(ExpandDimsParam::__FIELDS__())
-.set_attr_parser(ParamParser<ExpandDimsParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ExpandDimsParam>)
-.set_attr<FInferShape>("FInferShape", ExpandDimsInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const ExpandDimsParam& param = nnvm::get<ExpandDimsParam>(attrs.parsed);
-    return Array<Tensor>{ topi::expand_dims(inputs[0], param.axis, param.num_newaxis) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds){
-    return std::vector<NodeEntry> {
-      MakeNode("collapse_sum", n->attrs.name + "_grad", {ograds[0], n->inputs[0]})
-    };
-})
-.set_support_level(1);
-
-NNVM_REGISTER_OP(expand_like)
-  .describe(R"code(Expand an input array with the shape of second array.
-This operation can be thought of as a composition of expand_dims and broadcast_to.
-If the dimensions are already expanded then it just broadcasts.
-Examples::
-  input = [ 12.  19.  27.]
-  input.shape = (3,)
-  new_shape_array = [[[1,2],[2,3],[1,3]],
-                     [[1,4],[4,3],[5,2]],
-                     [[7,1],[7,2],[7,3]]]
-  new_shape_array.shape = (3, 3, 2)
-  expand_like(input, [1,2], new_shape_array) =
-                    [[[12,12],[12,12],[12,12]],
-                     [[19,19],[19,19],[19,19]],
-                     [[27,27],[27,27],[27,27]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("input", "Tensor", "Source input")
-.add_argument("shape_like", "Tensor", "Input with new shape")
-.add_arguments(IndicatorParam::__FIELDS__())
-.set_attr_parser(ParamParser<IndicatorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<IndicatorParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", AssignOutputAttr<TShape, 1, 0>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)
-// never transform layout of the second input array.
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    const IndicatorParam& param = nnvm::get<IndicatorParam>(n->attrs.parsed);
-    std::ostringstream axis;
-    axis << param.axis;
-
-    if (param.axis.ndim() == 0 && !param.exclude) {
-      // Special case needed because sum interprets axis=[] differently
-      return std::vector<NodeEntry>{
-        ograds[0],
-        MakeNode("zeros_like", n->attrs.name + "_zero_grad", {n->inputs[1]})
-      };
-    }
-
-    auto sum_node =
-      MakeNode("sum", n->attrs.name + "_sum_grad",
-               {ograds[0]},
-               {{"axis", axis.str()},
-                {"exclude", std::to_string(param.exclude)}});
-
-    return std::vector<NodeEntry>{
-      MakeNode("reshape_like", n->attrs.name + "_grad",
-               {sum_node, n->inputs[0]}),
-      MakeNode("zeros_like", n->attrs.name + "_zero_grad", {n->inputs[1]})
-    };
-  })
-  .set_support_level(4);
-
-// split
-DMLC_REGISTER_PARAMETER(SplitParam);
-
-inline void SplitParamParser(nnvm::NodeAttrs* attrs) {
-  SplitParam param;
-  param.Init(attrs->dict);
-  if (!std::isdigit(attrs->dict.at("indices_or_sections")[0])) {
-    param.equal_split = false;
-  } else {
-    CHECK_EQ(param.indices_or_sections.ndim(), 1);
-    param.equal_split = true;
-  }
-  attrs->parsed = std::move(param);
-}
-
-inline bool SplitInferShape(const NodeAttrs& attrs,
-                            std::vector<TShape>* in_shape,
-                            std::vector<TShape>* out_shape) {
-  const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
-  const TShape& dshape = (*in_shape)[0];
-  if (dshape.ndim() == 0) return false;
-
-  auto axis = param.axis;
-  if (axis < 0) {
-    axis += dshape.ndim();
-  }
-  CHECK_LT(axis, dshape.ndim())
-    << "axis should be within input dimension range but got " <<  axis;
-  CHECK_GT(axis, -1)
-    << "axis should be within input dimension range but got " <<  axis;
-
-  if (param.equal_split) {
-    int num_outputs = param.indices_or_sections[0];
-    CHECK_EQ(out_shape->size(), static_cast<size_t>(num_outputs));
-    TShape oshape = dshape;
-    CHECK_EQ(oshape[axis] % num_outputs, 0)
-        << "indices_or_sections need to be able to divide input.shape[axis] got sections "
-        << num_outputs << " and dimension " << oshape[axis];
-    oshape[axis] /= num_outputs;
-
-    for (size_t i = 0; i < out_shape->size(); ++i) {
-      NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, i, oshape);
-    }
-  } else {
-    dim_t num_outputs = param.indices_or_sections.ndim() + 1;
-    CHECK_EQ(out_shape->size(), static_cast<size_t>(num_outputs));
-    TShape oshape = dshape;
-    dim_t begin = 0;
-    for (dim_t i = 0; i < num_outputs - 1; ++i) {
-      CHECK_GT(param.indices_or_sections[i], begin)
-          << "indices_or_sections need to be a sorted ascending list got "
-          << param.indices_or_sections;
-      oshape[axis] = param.indices_or_sections[i] - begin;
-      begin = param.indices_or_sections[i];
-      NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, i, oshape);
-    }
-    CHECK_LT(begin, dshape[axis])
-        << "The sum of sections must match the input.shape[axis]";
-    oshape[axis] = dshape[axis] - begin;
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, num_outputs - 1, oshape);
-  }
-  return true;
-}
-
-inline uint32_t SplitNumOutputs(const NodeAttrs& attrs) {
-  const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
-  if (param.equal_split) {
-    return static_cast<uint32_t>(param.indices_or_sections[0]);
-  } else {
-    return static_cast<uint32_t>(param.indices_or_sections.ndim()) + 1;
-  }
-}
-
-// Intentionally not add ParamGetAttrDict for indices_or_sections.
-NNVM_REGISTER_OP(split)
-.describe(R"code(Splits an array along a particular axis into multiple sub-arrays.
-
-**Note** that `indices_or_sections` should evenly divide the length of the axis
-along which to split the array.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Array to be splitted")
-.add_arguments(SplitParam::__FIELDS__())
-.set_attr_parser(SplitParamParser)
-.set_attr<FInferShape>("FInferShape", SplitInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, -1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, -1>)
-.set_num_inputs(1)
-.set_num_outputs(SplitNumOutputs)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
-    if (param.equal_split) {
-      return Array<Tensor>{
-        topi::split_sections(inputs[0], param.indices_or_sections[0], param.axis) };
-    } else {
-      Array<Integer> indices;
-      for (auto i : param.indices_or_sections) {
-        indices.push_back(static_cast<int>(i));
-      }
-      return Array<Tensor>{ topi::split(inputs[0], indices, param.axis) };
-    }
-})
-.set_support_level(3);
-
-// cast
-DMLC_REGISTER_PARAMETER(CastParam);
-
-inline bool CastInferType(const NodeAttrs& attrs,
-                          std::vector<int>* in_attrs,
-                          std::vector<int>* out_attrs) {
-  const CastParam& param = nnvm::get<CastParam>(attrs.parsed);
-  CHECK_EQ(out_attrs->size(), 1U);
-  NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, param.dtype);
-  return true;
-}
-
-NNVM_REGISTER_OP(cast)
-.describe(R"code(Cast the content of input to dtype.
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data array")
-.add_arguments(CastParam::__FIELDS__())
-.set_attr_parser(ParamParser<CastParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<CastParam>)
-.set_attr<FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<FInferType>("FInferType", CastInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const CastParam& param = nnvm::get<CastParam>(attrs.parsed);
-    DataType dtype = GetTVMType(param.dtype);
-    return Array<Tensor>{ topi::cast(inputs[0], dtype) };
-})
-.set_support_level(1);
-
-
-// reshape
-DMLC_REGISTER_PARAMETER(ReshapeParam);
-
-inline bool ReshapeInferShape(const NodeAttrs& attrs,
-                              std::vector<TShape>* in_attrs,
-                              std::vector<TShape>* out_attrs) {
-  const ReshapeParam& param = nnvm::get<ReshapeParam>(attrs.parsed);
-  CHECK_GT(param.shape.ndim(), 0);
-  CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
-  CHECK_EQ(out_attrs->size(), 1U);
-
-  const TShape &dshape = (*in_attrs)[0];
-  if (dshape.ndim() == 0) return false;
-
-  const Tuple<int64_t>& target_shape = param.shape;
-  std::vector<int64_t> oshape;
-  dim_t src_idx = 0;
-  int infer_idx = -1;
-
-  for (dim_t i = 0; i < target_shape.ndim(); ++i) {
-    int svalue = target_shape[i];
-    // special flag handling for shape inference.
-    if (svalue > 0) {
-      oshape.push_back(svalue);
-      ++src_idx;
-    } else if (svalue == 0) {
-      // keep same
-      CHECK_LT(src_idx, dshape.ndim());
-      oshape.push_back(dshape[src_idx++]);
-    } else if (svalue == -1) {
-      // inference based on rest
-      CHECK_LT(infer_idx, 0)
-          << "One and only one dim can be inferred";
-      infer_idx = i;
-      oshape.push_back(1);
-      ++src_idx;
-    } else if (svalue == -2) {
-      // copy all remaining dims from source
-      while (src_idx < dshape.ndim()) {
-        oshape.push_back(dshape[src_idx++]);
-      }
-    } else if (svalue == -3) {
-      // merge two dims from source
-      CHECK_LT(src_idx + 1, dshape.ndim());
-      dim_t d1 = dshape[src_idx++];
-      dim_t d2 = dshape[src_idx++];
-      oshape.push_back(d1 * d2);
-    } else if (svalue == -4) {
-      // split the source dim s into two dims
-      // read the left dim and then the right dim (either can be -1)
-      CHECK_LT(i + 2, target_shape.ndim());
-      CHECK_LT(src_idx, dshape.ndim());
-      dim_t d0 = dshape[src_idx++];
-      int d1 = target_shape[++i];
-      int d2 = target_shape[++i];
-      CHECK(d1 != -1 || d2 != -1) << "Split dims cannot both be -1.";
-      if (d1 == -1) d1 = d0 / d2;
-      if (d2 == -1) d2 = d0 / d1;
-      CHECK_EQ(d1 * d2, static_cast<int>(d0)) <<
-          "Split dims " << d1 << ", " << d2 << " do not divide original dim " << d0;
-      oshape.push_back(d1);
-      oshape.push_back(d2);
-    }
-  }
-
-  if (infer_idx >= 0) {
-    if (dshape.Size() > 0) {
-      int new_size = 1;
-      for (int x : oshape) {
-        new_size *= x;
-      }
-      oshape[infer_idx] = dshape.Size() / new_size;
-    } else {
-      oshape[infer_idx] = 0;
-    }
-  }
-  TShape out_shape(oshape.begin(), oshape.end());
-  CHECK_EQ(out_shape.Size(), dshape.Size())
-      << "Target shape size is different to source. "
-      << "Target: " << out_shape
-      << "\nSource: " << dshape;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape);
-  return true;
-}
-
-NNVM_REGISTER_OP(reshape)
-.describe(R"code(Reshapes the input array.
-
-Given an array and a shape, this function returns a copy of the array in the new shape.
-The shape is a tuple of integers such as (2,3,4). The size of the new shape should be same as the size of the input array.
-
-Example::
-
-  reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]
-
-To give user more convenience in without doing manual shape inference,
-some dimensions of the shape can take special values from the set {0, -1, -2, -3, -4}.
-The significance of each is explained below:
-
-- ``0``  copy this dimension from the input to the output shape.
-
-  Example::
-
-  - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
-  - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)
-
-- ``-1`` infers the dimension of the output shape by using the remainder of the input dimensions
-  keeping the size of the new array same as that of the input array.
-  At most one dimension of shape can be -1.
-
-  Example::
-
-  - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
-  - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
-  - input shape = (2,3,4), shape=(-1,), output shape = (24,)
-
-- ``-2`` copy all/remainder of the input dimensions to the output shape.
-
-  Example::
-
-  - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
-  - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
-  - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)
-
-- ``-3`` use the product of two consecutive dimensions of the input shape as the output dimension.
-
-  Example::
-
-  - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
-  - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
-  - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
-  - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)
-
-- ``-4`` split one dimension of the input into two dimensions passed subsequent to -4 in shape (can contain -1).
-
-  Example::
-
-  - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
-  - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_arguments(ReshapeParam::__FIELDS__())
-.set_attr_parser(ParamParser<ReshapeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReshapeParam>)
-.set_attr<FInferShape>("FInferShape", ReshapeInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{ topi::reshape(inputs[0], out_info[0]->shape) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return std::vector<NodeEntry>{
-      MakeNode("reshape_like", n->attrs.name + "_grad",
-               {ograds[0], n->inputs[0]})
-    };
-})
-.set_support_level(3);
-
-inline bool ReshapeLikeInferType(const NodeAttrs &attrs,
-                                 std::vector<int> *in_attrs,
-                                 std::vector<int> *out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, (*in_attrs)[0]);
-  return true;
-}
-
-NNVM_REGISTER_OP(reshape_like)
-  .describe(R"code(Reshapes the input array by the size of another array.
-For an input array with shape ``(d1, d2, ..., dk)``, `reshape_like` operation reshapes
-the input array into an output array with the same shape as the second input array.
-.. note::
-    Sizes for both array should be compatible.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("shape_like", "Tensor", "Input data.")
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FInferShape>(
-  "FInferShape", [](const NodeAttrs& attrs,
-                    std::vector<TShape>* in_attrs,
-                    std::vector<TShape>* out_attrs) {
-    CHECK_EQ(in_attrs->at(0).Size(), in_attrs->at(1).Size())
-      << "Reshape inputs size should be compatible";
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, in_attrs->at(1));
-    return true;
-})
-.set_attr<FInferType>("FInferType", ReshapeLikeInferType)
-// never transform layout of the second input array.
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return std::vector<NodeEntry>{
-      MakeNode("reshape_like", n->attrs.name + "_grad", {ograds[0], n->inputs[0]}),
-      MakeNode("zeros_like", n->attrs.name + "_zero_grad", { n->inputs[1]})
-    };
-})
-.set_support_level(4);
-
-// squeeze
-DMLC_REGISTER_PARAMETER(SqueezeParam);
-
-inline bool SqueezeShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape>* in_attrs,
-                           std::vector<TShape>* out_attrs) {
-  const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed);
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& shp = (*in_attrs)[0];
-  if (shp.ndim() == 0) return false;
-
-  std::vector<int64_t> oshape;
-  if (param.axis.ndim() == 0) {
-    for (dim_t i = 0; i < shp.ndim(); ++i) {
-      if (shp[i] != 1) {
-        oshape.emplace_back(shp[i]);
-      }
-    }
-  } else {
-    std::unordered_set<dim_t> axis_checker;
-    for (size_t i = 0; i < param.axis.ndim(); ++i) {
-      int real_axis;
-      if (param.axis[i] < 0) {
-        real_axis = param.axis[i] + static_cast<int>(shp.ndim());
-      } else {
-        real_axis = param.axis[i];
-      }
-      CHECK(real_axis < static_cast<int>(shp.ndim()) && real_axis >= 0);
-      axis_checker.insert(real_axis);
-    }
-    for (size_t i = 0; i < shp.ndim(); ++i) {
-      if (axis_checker.find(i) == axis_checker.end()) {
-        oshape.emplace_back(shp[i]);
-      } else {
-        CHECK_EQ(shp[i], 1) << "The squeezed axis must have shape 1!"
-                            << "Want to squeeze " << i
-                            << ", which has shape" << shp[i];
-      }
-    }
-  }
-  if (oshape.size() == 0) {
-    // Handles the case where all axes are squeezed.
-    oshape.push_back(1);
-  }
-  TShape out_shape(oshape.begin(), oshape.end());
-  CHECK_EQ(out_shape.Size(), shp.Size())
-      << "Target shape size is different to source. "
-      << "Target: " << out_shape
-      << "\nSource: " << shp;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape);
-  return true;
-}
-
-NNVM_REGISTER_OP(squeeze)
-.describe(R"code(Squeeze axises in the array.
-
-Examples::
-
-  x = [[[0], [1], [2]]]
-  x.shape = (1, 3, 1)
-
-  squeeze(x) = [0, 1, 2]
-
-  squeeze(x, 0) = [[0], [1], [2]]
-
-  squeeze(x, (0, 2)) = [0, 1, 2]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Source input")
-.add_arguments(SqueezeParam::__FIELDS__())
-.set_attr_parser(ParamParser<SqueezeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SqueezeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", SqueezeShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseFixedLayoutUnknownOut<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed);
-    auto axis = ShapeToIntArray(param.axis);
-    return Array<Tensor>{ topi::squeeze(inputs[0], axis, true) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return std::vector<NodeEntry>{
-      MakeNode("reshape_like", n->attrs.name + "_grad",
-               {ograds[0], n->inputs[0]})
-    };
-})
-.set_support_level(1);
-
-// transpose
-DMLC_REGISTER_PARAMETER(TransposeParam);
-
-inline bool TransposeShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape>* in_attrs,
-                           std::vector<TShape>* out_attrs) {
-  const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& shp = (*in_attrs)[0];
-  if (shp.ndim() == 0) return false;
-
-  TShape ret(shp.ndim());
-  if (param.axes.ndim() == 0) {
-    for (dim_t i = 0; i < shp.ndim(); ++i) {
-      ret[i] = shp[shp.ndim() - 1 - i];
-    }
-  } else {
-    CHECK_EQ(shp.ndim(), param.axes.ndim());
-    for (size_t i = 0; i < shp.ndim(); ++i) {
-      CHECK(param.axes[i] < shp.ndim());
-      ret[i] = shp[param.axes[i]];
-    }
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, ret);
-  return true;
-}
-
-inline bool TransposeCorrectLayout(const NodeAttrs& attrs,
-                                   std::vector<Layout> *ilayouts,
-                                   const std::vector<Layout> *last_ilayouts,
-                                   std::vector<Layout> *olayouts) {
-  const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
-  CHECK_EQ(ilayouts->size(), 1U);
-  CHECK_EQ(olayouts->size(), 1U);
-
-  const Layout& input = last_ilayouts->at(0).defined()
-                        ? last_ilayouts->at(0)
-                        : ilayouts->at(0);
-
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, input);
-
-  if (input.defined()) {
-    std::ostringstream new_layout;
-    if (param.axes.ndim() == 0) {
-      for (size_t i = 0; i < input.ndim(); ++i) {
-        new_layout << input.at(input.ndim() - 1 - i);
-      }
-    } else {
-      CHECK_EQ(input.ndim(), param.axes.ndim());
-      for (size_t i = 0; i < input.ndim(); ++i) {
-        CHECK(param.axes[i] < static_cast<int>(input.ndim()));
-        new_layout << input.at(param.axes[i]);
-      }
-    }
-    NNVM_ASSIGN_LAYOUT(*olayouts, 0, Layout(new_layout.str()));
-  }
-
-  return true;
-}
-
-NNVM_REGISTER_OP(transpose)
-.describe(R"code(Permutes the dimensions of an array.
-
-Examples::
-
-  x = [[ 1, 2],
-       [ 3, 4]]
-
-  transpose(x) = [[ 1.,  3.],
-                  [ 2.,  4.]]
-
-  x = [[[ 1.,  2.],
-        [ 3.,  4.]],
-
-       [[ 5.,  6.],
-        [ 7.,  8.]]]
-
-  transpose(x) = [[[ 1.,  5.],
-                   [ 3.,  7.]],
-
-                  [[ 2.,  6.],
-                   [ 4.,  8.]]]
-
-  transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
-                                 [ 5.,  6.]],
-
-                                [[ 3.,  4.],
-                                 [ 7.,  8.]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Source input")
-.add_arguments(TransposeParam::__FIELDS__())
-.set_attr_parser(ParamParser<TransposeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<TransposeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", TransposeShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", TransposeCorrectLayout)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(4)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const TransposeParam& param = nnvm::get<TransposeParam>(attrs.parsed);
-    auto axes = ShapeToIntArray(param.axes);
-    return Array<Tensor>{ topi::transpose(inputs[0], axes) };
-})
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    const TransposeParam& param = nnvm::get<TransposeParam>(n->attrs.parsed);
-    std::ostringstream oss; oss << param.axes;
-    return std::vector<NodeEntry>{
-      MakeNode("transpose", n->attrs.name + "_t", {ograds[0]}, {{"axes", oss.str()}})
-    };
-});
-
-// strided_slice
-DMLC_REGISTER_PARAMETER(StridedSliceParam);
-
-inline void StridedSliceParamParser(nnvm::NodeAttrs* attrs) {
-  StridedSliceParam param;
-  param.Init(attrs->dict);
-  attrs->parsed = std::move(param);
-}
-
-inline bool StridedSliceInferShape(const NodeAttrs& attrs,
-                            std::vector<TShape>* in_shape,
-                            std::vector<TShape>* out_shape) {
-  const StridedSliceParam& param = nnvm::get<StridedSliceParam>(attrs.parsed);
-  const TShape& dshape = (*in_shape)[0];
-  if (dshape.ndim() == 0) return false;
-  TShape oshape = dshape;
-  dim_t num_axis = dshape.ndim();
-
-  std::vector<int64_t> begin_vec;
-  std::copy(param.begin.begin(), param.begin.end(), std::back_inserter(begin_vec));
-  for (dim_t i = begin_vec.size(); i < num_axis; ++i) {
-    begin_vec.push_back(0);
-  }
-
-  std::vector<int64_t> end_vec;
-  std::copy(param.end.begin(), param.end.end(), std::back_inserter(end_vec));
-  for (dim_t i = end_vec.size(); i < num_axis; ++i) {
-    end_vec.push_back(dshape[i]);
-  }
-
-  std::vector<int64_t> stride_vec;
-  std::copy(param.stride.begin(), param.stride.end(), std::back_inserter(stride_vec));
-  for (dim_t i = stride_vec.size(); i < num_axis; ++i) {
-    stride_vec.push_back(1);
-  }
-
-  for (dim_t i = 0; i < num_axis; ++i) {
-      int64_t begin_range = stride_vec[i] < 0 ? -1 : 0;
-      int64_t end_range = stride_vec[i] < 0 ? dshape[i] - 1 : dshape[i];
-      int64_t begin = begin_vec[i] < 0 ? dshape[i] + begin_vec[i] : begin_vec[i];
-      int64_t end = end_vec[i] < 0 ? dshape[i] + end_vec[i] : end_vec[i];
-      begin = std::min(std::max(begin, begin_range), end_range);
-      end = std::min(std::max(end, begin_range), end_range);
-
-      int interval = std::abs(end - begin);
-      int slice_size = static_cast<int>((interval
-                                       + std::abs(stride_vec[i]) - 1) / std::abs(stride_vec[i]));
-      CHECK(stride_vec[i] < 0 ? (end < begin) : (begin < end))
-        << ": Input [Begin=" << begin_vec[i] << ", End=" << end_vec[i]
-        << "] is invalid for axis=" << i;
-      oshape[i] = slice_size;
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(strided_slice)
-.describe(R"code(Strided slice of an array.
-
-Examples::
-
-  x = [[  1.,   4.,   7.,  10.],
-       [  2.,   5.,   8.,  11.],
-       [  3.,   6.,   9.,  12.]]
-
-  strided_slice(x, begin=[0, 1], end=[2, 4], stride=[1, 1]) = [[ 4.,  7.,  10.],
-                                                               [ 5.,  8.,  11.]]
-
-  x = [[[ 1.,  2.],
-        [ 3.,  4.]],
-
-       [[ 5.,  6.],
-        [ 7.,  8.]]]
-
-  strided_slice(x, begin=[0, 0], end=[2, 2]) = [[[ 1.,  2.],
-                                                 [ 3.,  4.]],
-
-                                                [[ 5.,  6.],
-                                                 [ 7.,  8.]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Array to be sliced")
-.add_arguments(StridedSliceParam::__FIELDS__())
-.set_attr_parser(StridedSliceParamParser)
-.set_attr<FInferShape>("FInferShape", StridedSliceInferShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const StridedSliceParam& param = nnvm::get<StridedSliceParam>(attrs.parsed);
-    Array<Integer> begin;
-    Array<Integer> end;
-    Array<Integer> stride;
-
-    for (int64_t i : param.begin) {
-      begin.push_back(static_cast<int>(i));
-    }
-
-    for (int64_t i : param.end) {
-      end.push_back(static_cast<int>(i));
-    }
-
-    for (int64_t i : param.stride) {
-      stride.push_back(static_cast<int>(i));
-    }
-
-    return Array<Tensor>{
-      topi::strided_slice(inputs[0], begin, end, stride)
-    };
-})
-.set_support_level(1);
-
-// Flip
-DMLC_REGISTER_PARAMETER(FlipParam);
-
-NNVM_REGISTER_OP(flip)
-.describe(R"code(Reverse the elements of an array.
-
-Examples::
-
-  x = [[ 1, 2],
-       [ 3, 4]]
-
-  flip(x) = [[ 3.,  4.],
-             [ 1.,  2.]]
-
-  x = [[[ 1.,  2.],
-        [ 3.,  4.]],
-
-       [[ 5.,  6.],
-        [ 7.,  8.]]]
-
-  flip(x) = [[[ 5.,  6.],
-              [ 7.,  8.]],
-
-             [[ 1.,  2.],
-              [ 3.,  4.]]]
-
-  flip(x, axis=1) = [[[ 3.,  4.],
-                      [ 1.,  2.]],
-
-                     [[ 7.,  8.],
-                      [ 5.,  6.]]]
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Source input")
-.add_arguments(FlipParam::__FIELDS__())
-.set_attr_parser(ParamParser<FlipParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<FlipParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(4)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const FlipParam& param = nnvm::get<FlipParam>(attrs.parsed);
-    return Array<Tensor>{ topi::flip(inputs[0], param.axis) };
-});
-
-
-// take
-DMLC_REGISTER_PARAMETER(TakeParam);
-
-inline bool TakeInferShape(const NodeAttrs& attrs,
-                           std::vector<TShape>* in_shape,
-                           std::vector<TShape>* out_shape) {
-  CHECK_EQ(in_shape->size(), 2U);
-  CHECK_EQ(out_shape->size(), 1U);
-  const TShape& dshape = (*in_shape)[0];
-  const TShape& indicesshape = (*in_shape)[1];
-  if (dshape.ndim() == 0) return false;
-  if (indicesshape.ndim() == 0) return false;
-
-  const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
-  TShape oshape((!param.axis ? 0: dshape.ndim() - 1) + indicesshape.ndim());
-  if (!param.axis) {
-    for (size_t j = 0; j < indicesshape.ndim(); ++j) {
-      oshape[j] = indicesshape[j];
-    }
-  } else {
-    int axis = param.axis.value();
-    if (axis < 0) {
-      axis += dshape.ndim();
-    }
-    CHECK_LT(axis, dshape.ndim());
-
-    size_t posi = 0;
-    for (size_t i = 0; i < dshape.ndim(); ++i) {
-      if (static_cast<int>(i) == axis) {
-        for (size_t j = 0; j < indicesshape.ndim(); ++j) {
-          oshape[posi++] = indicesshape[j];
-        }
-      } else {
-        oshape[posi++] = dshape[i];
-      }
-    }
-  }
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape);
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 1, indicesshape);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return dshape.Size() != 0;
-}
-
-inline bool TakeInferType(const NodeAttrs& attrs,
-                          std::vector<int>* in_attrs,
-                          std::vector<int>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  CHECK_EQ((*in_attrs)[1], kInt32);
-  NNVM_ASSIGN_INPUT_TYPE(attrs, *in_attrs, 0, (*in_attrs)[0]);
-  NNVM_ASSIGN_INPUT_TYPE(attrs, *in_attrs, 1, static_cast<int>(kInt32));
-  NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, (*in_attrs)[0]);
-  return true;
-}
-
-inline bool TakeCorrectLayout(const NodeAttrs& attrs,
-                              std::vector<Layout> *ilayouts,
-                              const std::vector<Layout> *last_ilayouts,
-                              std::vector<Layout> *olayouts) {
-  CHECK_EQ(ilayouts->size(), last_ilayouts->size());
-  CHECK_EQ(olayouts->size(), 1U);
-
-  for (size_t i = 0; i < ilayouts->size(); ++i) {
-    const Layout& input = last_ilayouts->at(i).defined() ?
-                          last_ilayouts->at(i) : ilayouts->at(i);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, i, input);
-  }
-
-  return true;
-}
-
-NNVM_REGISTER_OP(take)
-.describe(R"code(Take elements from an array along an axis.
-
-When axis is not None, this function does the same thing as 'fancy' indexing
-(indexing arrays using arrays); however, it can be easier to use if you need
-elements along a given axis.
-
-**Note** that when axis is none the flattened input array is used.
-
-Examples::
-
-  a = [[ 1, 2],
-       [ 3, 4]]
-  indices = [3, 0, 2]
-  take(a, indices) = [ 4, 1, 3]
-
-  a = [[ 1., 2.],
-       [ 3., 4.]]
-  indices = [1, 0]
-  take(a, indices, axis=1) = [[ 2., 1.],
-                              [ 4., 3.]]
-
-  )code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Array to be indexed")
-.add_argument("indices", "Tensor", "The indices of the values to extract")
-.add_arguments(TakeParam::__FIELDS__())
-.set_attr_parser(ParamParser<TakeParam>)
-.set_attr<FInferShape>("FInferShape", TakeInferShape)
-.set_attr<FInferType>("FInferType", TakeInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", TakeCorrectLayout)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_support_level(3)
-.set_attr<FTVMCompute>(
-    "FTVMCompute", [](const NodeAttrs& attrs,
-                      const Array<Tensor>& inputs,
-                      const Array<Tensor>& out_info) {
-      const TakeParam& param = nnvm::get<TakeParam>(attrs.parsed);
-      if (!param.axis) {
-        return Array<Tensor>{
-            topi::take(inputs[0], inputs[1]) };
-      } else {
-        return Array<Tensor>{
-            topi::take(inputs[0], inputs[1], param.axis.value()) };
-      }
-  });
-
-
-// SliceLike
-DMLC_REGISTER_PARAMETER(SliceLikeParam);
-
-inline bool SliceLikeShape(const nnvm::NodeAttrs& attrs,
-                           std::vector<TShape>* in_attrs,
-                           std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const SliceLikeParam& param = nnvm::get<SliceLikeParam>(attrs.parsed);
-  const TShape& src_shape = in_attrs->at(0);
-  const TShape& target_shape = in_attrs->at(1);
-  Tuple<dim_t> end_idx;
-  end_idx = Tuple<dim_t>(src_shape);
-  if (param.axis.ndim() == 0) {
-    for (size_t i = 0; i < src_shape.ndim(); ++i) {
-      if (i < target_shape.ndim()) {
-        end_idx[i] = target_shape[i];
-        CHECK_LE(end_idx[i], src_shape[i])
-          << "End index of axis " << i << " exceeds input shape: "
-          << end_idx[i] << " vs " << src_shape[i];
-      }
-    }
-  } else {
-    for (auto i : param.axis) {
-      if (i < 0) {
-        i = src_shape.ndim() + i;
-      }
-      CHECK_LT(i, target_shape.ndim())
-        << "Axis " << i << " exceeds dimension "
-        << target_shape.ndim()<< " of target_shape.";
-      end_idx[i] = target_shape[i];
-      CHECK_LE(end_idx[i], src_shape[i])
-        << "End index of axis " << i << " exceeds input shape: "
-        << end_idx[i] << " vs " << src_shape[i];
-    }
-  }
-  TShape out_shape = TShape(std::move(end_idx));
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape);
-  return true;
-}
-
-// Adapter function to make int array.
-Array<Integer> GetIntArray(Array<Expr> arr) {
-  for (size_t i = 0; i < arr.size(); ++i) {
-    CHECK(!arr[i].defined() || arr[i].as<IntImm>())
-        << "Expect an int array";
-  }
-  return Downcast<Array<Integer> >(arr);
-}
-
-NNVM_REGISTER_OP(slice_like)
-.describe(R"code(Slice the first input respect to the second input.
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data to be sliced.")
-.add_argument("slice_like", "Tensor", "Tensor with target shape")
-.set_num_inputs(2)
-.set_num_outputs(1)
-.add_arguments(SliceLikeParam::__FIELDS__())
-.set_attr_parser(ParamParser<SliceLikeParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<SliceLikeParam>)
-.set_attr<FInferShape>("FInferShape", SliceLikeShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<2, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", ElemwiseBinaryKeepLeftLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    const auto& param = nnvm::get<SliceLikeParam>(attrs.parsed);
-    Array<Expr> src_shape = inputs[0]->shape;
-    Array<Expr> target_shape = inputs[1]->shape;
-    Array<Expr> begin_idx, end_idx, strides;
-    for (size_t i = 0; i < src_shape.size(); ++i) {
-      begin_idx.push_back(make_const(tvm::DataType::Int(32), 0));
-      strides.push_back(make_const(tvm::DataType::Int(32), 1));
-    }
-    end_idx = Array<Expr>(src_shape);
-    if (param.axis.ndim() == 0) {
-      for (size_t i = 0; i < src_shape.size(); ++i) {
-        if (i < target_shape.size()) {
-          end_idx.Set(i, target_shape[i]);
-          CHECK_LE(topi::GetConstInt(end_idx[i]),
-                   topi::GetConstInt(src_shape[i]))
-            << "End index of axis " << i << " exceeds input shape: "
-            << topi::GetConstInt(end_idx[i]) << " vs "
-            << topi::GetConstInt(src_shape[i]);
-        }
-      }
-    } else {
-      for (int axis : param.axis) {
-        if (axis < 0) {
-          axis = static_cast<int>(src_shape.size()) + axis;
-        }
-        end_idx.Set(static_cast<size_t>(axis), target_shape[axis]);
-        CHECK_LE(topi::GetConstInt(end_idx[axis]),
-                 topi::GetConstInt(src_shape[axis]))
-          << "End index of axis " << axis << " exceeds input shape: "
-          << topi::GetConstInt(end_idx[axis]) << " vs "
-          << topi::GetConstInt(src_shape[axis]);
-      }
-    }
-    return Array<Tensor>{
-      topi::strided_slice(inputs[0],
-                          GetIntArray(begin_idx),
-                          GetIntArray(end_idx),
-                          GetIntArray(strides))
-    };
-})
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"data", "slice_like"};
-})
-.set_support_level(4);
-
-// where
-inline bool WhereShape(const nnvm::NodeAttrs& attrs,
-                       std::vector<TShape>* in_attrs,
-                       std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 3U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& cond_shape = in_attrs->at(0);
-  const TShape& x_shape = in_attrs->at(1);
-  const TShape& y_shape = in_attrs->at(2);
-  CHECK_EQ(x_shape, y_shape) << "x and y must have the same shape: "
-                             << x_shape << " vs " << y_shape;
-  if (cond_shape != x_shape) {
-    CHECK_EQ(cond_shape.ndim(), 1)
-      << "Shape of condition " << cond_shape
-      << " must be either equal to x or has dimension of 1.";
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, x_shape);
-  return true;
-}
-
-inline bool WhereInferType(const NodeAttrs &attrs,
-                           std::vector<int> *in_attrs,
-                           std::vector<int> *out_attrs) {
-  DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(1));
-  return true;
-}
-
-inline bool WhereCorrectLayout(const NodeAttrs& attrs,
-                               std::vector<Layout> *ilayouts,
-                               const std::vector<Layout> *last_ilayouts,
-                               std::vector<Layout> *olayouts) {
-  CHECK_EQ(ilayouts->size(), last_ilayouts->size());
-  CHECK_EQ(olayouts->size(), 1U);
-
-  for (size_t i = 0; i < ilayouts->size(); ++i) {
-    const Layout& input = last_ilayouts->at(i).defined() ?
-                          last_ilayouts->at(i) : ilayouts->at(i);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, i, input);
-  }
-
-  return true;
-}
-
-NNVM_REGISTER_OP(where)
-.describe(R"code(
-Return the elements, either from x or y, depending on the condition.
-
-Given three ndarrays, condition, x, and y, return an ndarray with the elements
-from x or y, depending on the elements from condition are true or false.
-x and y must have the same shape. If condition has the same shape as x,
-each element in the output array is from x if the corresponding element
-in the condition is true, and from y if false.
-
-If condition does not have the same shape as x, it must be a 1D array whose
-size is the same as x’s first dimension size. Each row of the output array
-is from x’s row if the corresponding element from condition is true, and
-from y’s row if false.
-
-Note that all non-zero values are interpreted as True in condition.
-
-Examples::
-
-  x = [[1, 2], [3, 4]]
-  y = [[5, 6], [7, 8]]
-  cond = [[0, 1], [-1, 0]]
-  where(cond, x, y) = [[5, 2], [3, 8]]
-
-
-  cond = [1, 0]
-  where(cond, x, y) = [[1, 2], [7, 8]]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("condition", "Tensor", "Condition array")
-.add_argument("x", "Tensor", "First array to be selected")
-.add_argument("y", "Tensor", "Second array to be selected")
-.set_num_inputs(3)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", WhereShape)
-.set_attr<FInferType>("FInferType", WhereInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", WhereCorrectLayout)
-.set_attr<FTVMCompute>(
-  "FTVMCompute", [](const NodeAttrs& attrs,
-                    const Array<Tensor>& inputs,
-                    const Array<Tensor>& out_info) {
-    return Array<Tensor>{
-      topi::where(inputs[0], inputs[1], inputs[2])
-    };
-  })
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-  return std::vector<std::string>{"condition", "x", "y"};
-})
-.set_support_level(4);
-
-// gather_nd
-inline bool GatherNDInferShape(const nnvm::NodeAttrs& attrs,
-                               std::vector<TShape>* in_attrs,
-                               std::vector<TShape>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const TShape& data_shape = in_attrs->at(0);
-  const TShape& indices_shape = in_attrs->at(1);
-  CHECK_GT(indices_shape.ndim(), 1) << "indices must have at least 2 dimensions";
-  CHECK_LE(indices_shape[0], data_shape.ndim()) <<
-      "dim 0 of indices must be no more than rank of data";
-  std::vector<dim_t> oshape;
-  for (size_t i = 1; i < indices_shape.ndim(); ++i) {
-    oshape.push_back(indices_shape[i]);
-  }
-  for (size_t i = indices_shape[0]; i < data_shape.ndim(); ++i) {
-    oshape.push_back(data_shape[i]);
-  }
-  if (oshape.size() == 0) {
-    oshape.push_back(1);
-  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0,
-                           TShape(oshape.begin(), oshape.end()));
-  return true;
-}
-
-inline bool GatherNDInferType(const NodeAttrs &attrs,
-                              std::vector<int> *in_attrs,
-                              std::vector<int> *out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, (*in_attrs)[0]);
-  return true;
-}
-
-inline bool GatherNDCorrectLayout(const NodeAttrs& attrs,
-                                  std::vector<Layout> *ilayouts,
-                                  const std::vector<Layout> *last_ilayouts,
-                                  std::vector<Layout> *olayouts) {
-  CHECK_EQ(ilayouts->size(), last_ilayouts->size());
-  CHECK_EQ(olayouts->size(), 1U);
-
-  for (size_t i = 0; i < ilayouts->size(); ++i) {
-    const Layout& input = last_ilayouts->at(i).defined() ?
-                          last_ilayouts->at(i) : ilayouts->at(i);
-    NNVM_ASSIGN_LAYOUT(*ilayouts, i, input);
-  }
-
-  return true;
-}
-
-NNVM_REGISTER_OP(gather_nd)
-.describe(R"code(
-Gather elements or slices from ``data`` into a tensor specified by ``indices``.
-
-The shape of output tensor is inferred from ``indices``. Given ``data`` with
-shape ``(X0, X1, ..., X_{N-1})`` and ``indices`` with shape ``(Y_0, ...,
-Y_{M-1})``, the output will have shape ``(Y_1, ..., Y_{M-1}, X_{Y_0}, ...,
-X_{N-1})`` when ``Y_0 < N``, or ``(Y_1, ..., Y_{M-1})`` when ``Y_0 == N``. The
-operator is invalid when ``Y_0 > N``.
-
-The element in output is defined as follows::
-
-  output[y_1, ..., y_{M-1}, x_{Y_0}, ..., x_{N-1}] = data[indices[0, y_1, ..., y_{M-1}],
-                                                     ...,
-                                                     indices[Y_0-1, y_1, ..., y_{M-1}],
-                                                     x_{Y_0}, ..., x_{N-1}]
-
-Examples::
-
-  data = [[0, 1], [2, 3]]
-  indices = [[1], [0]]
-  gather_nd(data, indices) = [2]
-
-  data = [[0, 1], [2, 3]]
-  indices = [[1, 1, 0], [0, 1, 0]]
-  gather_nd(data, indices) = [2, 3, 0]
-
-  data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
-  indices = [[0, 1], [1, 0]]
-  gather_nd(data, indices) = [[3, 4], [5, 6]]
-
-)code" NNVM_ADD_FILELINE)
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("indices", "Tensor", "Indices of data")
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr<FInferShape>("FInferShape", GatherNDInferShape)
-.set_attr<FInferType>("FInferType", GatherNDInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", GatherNDCorrectLayout)
-.set_attr<FTVMCompute>(
-    "FTVMCompute", [](const NodeAttrs& attrs,
-                      const Array<Tensor>& inputs,
-                      const Array<Tensor>& out_info) {
-      return Array<Tensor>{
-        topi::gather_nd(inputs[0], inputs[1]) };
-  })
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-  return std::vector<std::string>{"data", "indices"};
-})
-.set_support_level(3);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/vision/nms.cc b/nnvm/src/top/vision/nms.cc
deleted file mode 100644
index ec97408284e5..000000000000
--- a/nnvm/src/top/vision/nms.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file nms.cc
- * \brief Property def of SSD non-maximum suppression operator.
- */
-
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/top/nn.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include "../op_common.h"
-#include "../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-using compiler::FTVMCompute;
-using tvm::Tensor;
-using tvm::Array;
-
-DMLC_REGISTER_PARAMETER(NonMaximumSuppressionParam);
-
-bool NMSShape(const NodeAttrs& attrs,
-              std::vector<TShape> *in_attrs,
-              std::vector<TShape> *out_attrs) {
-  const NonMaximumSuppressionParam& param =
-    nnvm::get<NonMaximumSuppressionParam>(attrs.parsed);
-  CHECK_EQ(in_attrs->size(), 2U) << "Inputs: [data, valid_count]";
-  TShape dshape = in_attrs->at(0);
-  TShape vshape = in_attrs->at(1);
-  CHECK_EQ(dshape.ndim(), 3U) << "Input data should be 3-D.";
-  CHECK_EQ(vshape.ndim(), 1U) << "Input valid count should be 1-D.";
-  CHECK_EQ(dshape[2], 6U) << "Data input should have shape "
-    "(batch_size, num_anchors, 6).";
-  CHECK_EQ(dshape[0], vshape[0]) << "batch_size mismatch.";
-  out_attrs->clear();
-  if (param.return_indices) {
-    TShape oshape = TShape(2);
-    oshape[0] = dshape[0];
-    oshape[1] = dshape[1];
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
-  } else {
-    NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, dshape);
-  }
-  return true;
-}
-
-inline bool NMSInferType(const NodeAttrs &attrs,
-                         std::vector<int> *in_attrs,
-                         std::vector<int> *out_attrs) {
-  DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(0));
-  return true;
-}
-
-inline bool NMSInferLayout(const NodeAttrs& attrs,
-                           std::vector<Layout> *ilayouts,
-                           const std::vector<Layout> *last_ilayouts,
-                           std::vector<Layout> *olayouts) {
-  static const Layout kNCHW("NCHW");
-  CHECK_EQ(ilayouts->size(), 2U);
-  CHECK_EQ(olayouts->size(), 1U);
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW);
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 1, kNCHW);
-  return true;
-}
-
-NNVM_REGISTER_OP(non_max_suppression)
-  .describe(R"doc("Non-maximum suppression."
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(2)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<NonMaximumSuppressionParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict",
-                        ParamGetAttrDict<NonMaximumSuppressionParam>)
-.add_arguments(NonMaximumSuppressionParam::__FIELDS__())
-.add_argument("data", "Tensor", "Input data.")
-.add_argument("valid_count", "Tensor", "Number of valid anchor boxes.")
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-  return std::vector<std::string>{"data", "valid_count"};
-})
-.set_attr<FInferShape>("FInferShape", NMSShape)
-.set_attr<FInferType>("FInferType", NMSInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", NMSInferLayout)
-.set_support_level(4);
-
-}  // namespace top
-}  // namespace nnvm
-
diff --git a/nnvm/src/top/vision/ssd/mutibox_op.cc b/nnvm/src/top/vision/ssd/mutibox_op.cc
deleted file mode 100644
index 47f2f82a8664..000000000000
--- a/nnvm/src/top/vision/ssd/mutibox_op.cc
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file multibox_op.cc
- * \brief Property def of SSD multibox related operators.
- */
-
-#include <tvm/expr.h>
-#include <tvm/packed_func_ext.h>
-#include <nnvm/op.h>
-#include <nnvm/top/nn.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/compiler/op_attr_types.h>
-#include "../../op_common.h"
-#include "../../elemwise_op_common.h"
-
-namespace nnvm {
-namespace top {
-using compiler::FTVMCompute;
-using tvm::Tensor;
-using tvm::Array;
-
-DMLC_REGISTER_PARAMETER(MultiBoxPriorParam);
-
-bool MultiBoxPriorShape(const NodeAttrs& attrs,
-                        std::vector<TShape> *in_attrs,
-                        std::vector<TShape> *out_attrs) {
-  const MultiBoxPriorParam& param = nnvm::get<MultiBoxPriorParam>(attrs.parsed);
-  CHECK_EQ(in_attrs->size(), 1U) << "Inputs: [data]" << in_attrs->size();
-  TShape dshape = in_attrs->at(0);
-  CHECK_GE(dshape.ndim(), 4U) << "Input data should be 4D: "
-      "[batch, channel, height, width]";
-  int in_height = dshape[2];
-  CHECK_GT(in_height, 0) << "Input height should > 0";
-  int in_width = dshape[3];
-  CHECK_GT(in_width, 0) << "Input width should > 0";
-  // since input sizes are same in each batch, we could share MultiBoxPrior
-  TShape oshape = TShape(3);
-  int num_sizes = param.sizes.ndim();
-  int num_ratios = param.ratios.ndim();
-  oshape[0] = 1;
-  oshape[1] = in_height * in_width * (num_sizes + num_ratios - 1);
-  oshape[2] = 4;
-  CHECK_EQ(param.steps.ndim(), 2) << "Step ndim must be 2: (step_y, step_x)";
-  CHECK_GE(param.steps[0] * param.steps[1], 0) << "Must specify both "
-      "step_y and step_x";
-  out_attrs->clear();
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape);
-  return true;
-}
-
-inline bool MultiBoxPriorLayout(const NodeAttrs& attrs,
-                                std::vector<Layout> *ilayouts,
-                                const std::vector<Layout> *last_ilayouts,
-                                std::vector<Layout> *olayouts) {
-  static const Layout kNCHW("NCHW");
-  CHECK_EQ(ilayouts->size(), 1U);
-  CHECK_EQ(olayouts->size(), 1U);
-  NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW);
-  return true;
-}
-
-NNVM_REGISTER_OP(multibox_prior)
-  .describe(R"doc("Generate prior(anchor) boxes from data, sizes and ratios."
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<MultiBoxPriorParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<MultiBoxPriorParam>)
-.add_arguments(MultiBoxPriorParam::__FIELDS__())
-.add_argument("data", "Tensor", "Input data")
-.set_attr<FInferShape>("FInferShape", MultiBoxPriorShape)
-.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCorrectLayout>("FCorrectLayout", MultiBoxPriorLayout)
-.set_attr<FGradient>(
-  "FGradient", [](const NodePtr& n,
-                  const std::vector<NodeEntry>& ograds) {
-    return std::vector<NodeEntry>{
-      MakeNode("zeros_like", n->attrs.name + "_zero_grad",
-      {n->inputs[0]}),
-      ograds[0]
-    };
-})
-.set_support_level(4);
-
-DMLC_REGISTER_PARAMETER(MultiBoxTransformLocParam);
-
-bool MultiBoxTransformLocShape(const NodeAttrs& attrs,
-                               std::vector<TShape> *in_attrs,
-                               std::vector<TShape> *out_attrs) {
-  CHECK_EQ(in_attrs->size(), 3U) << "Inputs: [cls_prob, loc_pred, anchor]";
-  TShape cshape = in_attrs->at(0);
-  TShape lshape = in_attrs->at(1);
-  TShape ashape = in_attrs->at(2);
-  CHECK_EQ(cshape.ndim(), 3U) << "Class probability should be 3-D.";
-  CHECK_EQ(lshape.ndim(), 2U) << "Location prediction should be 2-D.";
-  CHECK_EQ(ashape.ndim(), 3U) << "Anchor should be 3-D.";
-  CHECK_EQ(cshape[2], ashape[1]) << "Number of anchors mismatch.";
-  CHECK_EQ(cshape[2] * 4, lshape[1]) << "# anchors mismatch with # loc.";
-  CHECK_GT(ashape[1], 0U) << "Number of anchors must > 0.";
-  CHECK_EQ(ashape[2], 4U);
-  TShape oshape0 = TShape(3);
-  oshape0[0] = cshape[0];
-  oshape0[1] = ashape[1];
-  oshape0[2] = 6;  // [id, prob, xmin, ymin, xmax, ymax]
-  TShape oshape1 = TShape(1);
-  oshape1[0] = cshape[0];
-  out_attrs->clear();
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape0);
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 1, oshape1);
-  return true;
-}
-
-inline bool MultiBoxTransformLocLayout(const NodeAttrs& attrs,
-                                       std::vector<Layout> *ilayouts,
-                                       const std::vector<Layout> *last_ilayouts,
-                                       std::vector<Layout> *olayouts) {
-  CHECK_EQ(ilayouts->size(), 3U);
-  CHECK_EQ(last_ilayouts->size(), 3U);
-  CHECK_EQ(olayouts->size(), 2U);
-  for (size_t i = 0; i < last_ilayouts->size(); ++i) {
-    const Layout& last_layout = last_ilayouts->at(i);
-    if (last_layout.defined()) {
-      NNVM_ASSIGN_LAYOUT(*ilayouts, i, last_layout);
-    }
-  }
-  return true;
-}
-
-inline bool MultiBoxTransformLocInferType(const NodeAttrs &attrs,
-                                          std::vector<int> *in_attrs,
-                                          std::vector<int> *out_attrs) {
-  DTYPE_ASSIGN(out_attrs->at(0), in_attrs->at(0));
-  DTYPE_ASSIGN(out_attrs->at(1), 4U);
-  return true;
-}
-
-NNVM_REGISTER_OP(multibox_transform_loc)
-  .describe(R"doc("Location transformation for multibox detection."
-)doc" NNVM_ADD_FILELINE)
-.set_num_inputs(3)
-.set_num_outputs(2)
-.set_attr_parser(ParamParser<MultiBoxTransformLocParam>)
-.set_attr<FGetAttrDict>("FGetAttrDict",
-                        ParamGetAttrDict<MultiBoxTransformLocParam>)
-.add_arguments(MultiBoxTransformLocParam::__FIELDS__())
-.add_argument("cls_prob", "Tensor", "Class probabilities.")
-.add_argument("loc_pred", "Tensor", "Location regression predictions.")
-.add_argument("anchor", "Tensor", "Multibox prior anchor boxes")
-.set_attr<FListInputNames>("FListInputNames", [](const NodeAttrs& attrs) {
-    return std::vector<std::string>{"cls_prob", "loc_pred", "anchor"};
-})
-.set_attr<FInferShape>("FInferShape", MultiBoxTransformLocShape)
-.set_attr<FInferType>("FInferType", MultiBoxTransformLocInferType)
-.set_attr<FCorrectLayout>("FCorrectLayout", MultiBoxTransformLocLayout)
-.set_support_level(4);
-
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/vision/yolo/reorg.cc b/nnvm/src/top/vision/yolo/reorg.cc
deleted file mode 100644
index c16d46ff4652..000000000000
--- a/nnvm/src/top/vision/yolo/reorg.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file reorg.cc
- */
-#include <nnvm/op.h>
-#include <nnvm/node.h>
-#include <nnvm/op_attr_types.h>
-#include <nnvm/top/nn.h>
-#include "../../op_common.h"
-#include "../../elemwise_op_common.h"
-#include "reorg.h"
-
-namespace nnvm {
-namespace top {
-
-// reorg
-DMLC_REGISTER_PARAMETER(ReorgParam);
-
-inline bool ReorgInferShape(const nnvm::NodeAttrs &attrs,
-                            std::vector<TShape> *in_shape,
-                            std::vector<TShape> *out_shape) {
-  const ReorgParam &param = nnvm::get<ReorgParam>(attrs.parsed);
-  TShape dshape = in_shape->at(0);
-  if (dshape.ndim() == 0)
-    return false;
-  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, 0, dshape);
-  CHECK_EQ(dshape.ndim(), 4) << "Input data should be 4D";
-  CHECK_GT(param.stride, 0U) << "Stride value cannot be 0";
-  TShape oshape({dshape[0], 0, 0, 0});
-  oshape[1] = dshape[1] * param.stride * param.stride;
-  oshape[2] = dshape[2] / param.stride;
-  oshape[3] = dshape[3] / param.stride;
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
-  return true;
-}
-
-NNVM_REGISTER_OP(yolo_reorg)
-.describe(R"(Perform reorg operation on input array based on the stride value.
-- **data**: Input is 4D array of shape (batch_size, channels, in_height, in_width).
-- **out**: Output is 4D array of shape (batch_size, channels/(stride*stride), in_height*stride, in_width*stride).
-)" NNVM_ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_support_level(5)
-.add_argument("data", "Tensor", "Data input to reorganize")
-.set_attr_parser(ParamParser<ReorgParam>)
-.add_arguments(ReorgParam::__FIELDS__())
-.set_attr<FGetAttrDict>("FGetAttrDict", ParamGetAttrDict<ReorgParam>)
-.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
-.set_attr<FInferShape>("FInferShape", ReorgInferShape);
-}  // namespace top
-}  // namespace nnvm
diff --git a/nnvm/src/top/vision/yolo/reorg.h b/nnvm/src/top/vision/yolo/reorg.h
deleted file mode 100644
index 53549df3634a..000000000000
--- a/nnvm/src/top/vision/yolo/reorg.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file reorg.h
- */
-#ifndef NNVM_TOP_VISION_YOLO_REORG_H_
-#define NNVM_TOP_VISION_YOLO_REORG_H_
-
-#include <string>
-#include <vector>
-#include <utility>
-#include <iostream>
-#include <sstream>
-
-namespace nnvm {
-namespace top {
-
-template <typename AttrType,
-          bool (*is_none)(const AttrType &),
-          bool (*assign)(AttrType *,
-          const AttrType &),
-          bool reverse_infer,
-          std::string (*attr_string)(const AttrType &),
-          int n_in = -1,
-          int n_out = -1>
-inline bool ReorgAttr(const nnvm::NodeAttrs &attrs,
-                      std::vector<AttrType> *in_attrs,
-                      std::vector<AttrType> *out_attrs,
-                      const AttrType &none) {
-  AttrType dattr = none;
-  size_t in_size = in_attrs->size();
-  size_t out_size = out_attrs->size();
-  if (n_in != -1) {
-    in_size = static_cast<size_t>(n_in);
-  }
-  if (n_out != -1) {
-    out_size = static_cast<size_t>(n_out);
-  }
-
-  auto deduce = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
-    for (size_t i = 0; i < size; ++i) {
-      if (i == 0) {
-        CHECK(assign(&dattr, (*vec)[i]))
-            << "Incompatible attr in node " << attrs.name << " at " << i
-            << "-th " << name << ": "
-            << "expected " << attr_string(dattr) << ", got "
-            << attr_string((*vec)[i]);
-      }
-    }
-  };
-  deduce(in_attrs, in_size, "input");
-
-  auto write = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
-    for (size_t i = 0; i < size; ++i) {
-      CHECK(assign(&(*vec)[i], dattr))
-          << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
-          << name << ": "
-          << "expected " << attr_string(dattr) << ", got "
-          << attr_string((*vec)[i]);
-    }
-  };
-  write(out_attrs, out_size, "output");
-
-  if (is_none(dattr)) {
-    return false;
-  }
-  return true;
-}
-
-template <int n_in, int n_out>
-inline bool ReorgShape(const NodeAttrs &attrs,
-                       std::vector<TShape> *in_attrs,
-                       std::vector<TShape> *out_attrs) {
-  if (n_in != -1) {
-    CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in))
-        << " in operator " << attrs.name;
-  }
-  if (n_out != -1) {
-    CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out))
-        << " in operator " << attrs.name;
-  }
-  return ReorgAttr<TShape, shape_is_none, shape_assign, true, shape_string>(
-      attrs, in_attrs, out_attrs, TShape());
-}
-
-template <int n_in, int n_out>
-inline bool ReorgType(const NodeAttrs &attrs,
-                      std::vector<int> *in_attrs,
-                      std::vector<int> *out_attrs) {
-  if (n_in != -1) {
-    CHECK_EQ(in_attrs->size(), static_cast<size_t>(n_in))
-        << " in operator " << attrs.name;
-  }
-  if (n_out != -1) {
-    CHECK_EQ(out_attrs->size(), static_cast<size_t>(n_out))
-        << " in operator " << attrs.name;
-  }
-  return ReorgAttr<int, type_is_none, type_assign, true, type_string>(
-      attrs, in_attrs, out_attrs, -1);
-}
-
-struct ReorgParam : public dmlc::Parameter<ReorgParam> {
-  int stride;
-
-  DMLC_DECLARE_PARAMETER(ReorgParam) {
-    DMLC_DECLARE_FIELD(stride).set_default(1).describe("Stride value");
-  }
-};
-}  // namespace top
-}  // namespace nnvm
-#endif  // NNVM_TOP_VISION_YOLO_REORG_H_
diff --git a/nnvm/tests/python/compiler/test_alter_op_layout.py b/nnvm/tests/python/compiler/test_alter_op_layout.py
deleted file mode 100644
index aad634f03843..000000000000
--- a/nnvm/tests/python/compiler/test_alter_op_layout.py
+++ /dev/null
@@ -1,121 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for AlterOpLayout pass"""
-from nnvm import symbol as sym
-from nnvm.compiler import graph_attr
-from nnvm.top import registry as reg
-import nnvm.graph as graph
-
-def get_layouts(g):
-    ldict = {}
-    vlayout = g.json_attr("layout")
-    entry_ptr = g.index.entry_ptr
-    for i, n in enumerate(g.index.nodes):
-        begin, end = entry_ptr[i], entry_ptr[i + 1]
-        ldict[n["name"]] = vlayout[begin:end]
-    return ldict
-
-
-def test_alter_conv2d_layout():
-    data = sym.Variable("data", shape=(1, 32, 512, 512))
-    conv = sym.conv2d(data, name="conv", channels=16,
-                      kernel_size=(3,3), padding=(1,1),
-                      use_bias=False, layout="NCHW")
-    # split here
-    convs = sym.split(conv, indices_or_sections=2)
-    relus = [sym.relu(x, name="relu") for x in convs]
-    relu = sym.concatenate(*relus)
-    flatten = sym.flatten(relu, name="flatten")
-    softmax = sym.softmax(flatten, name="softmax")
-    g = graph.create(softmax)
-
-    g = g.apply("CorrectLayout")
-    g = graph_attr.set_dtype_inputs(g, "float32")
-    g = g.apply(["InferShape", "InferType"])
-    layouts_origin = get_layouts(g)
-
-    @reg.register_alter_op_layout("conv2d", level=100)
-    def alter_conv2d_layout(attrs, inputs, tinfos):
-        new_attrs = {k : attrs[k] for k in attrs.keys()}
-        new_attrs["layout"] = "NCHW16c"
-        new_attrs["kernel_layout"] = "NCHW16c"
-        new_attrs["name"] = "conv_alter"
-        return sym.conv2d(inputs[0], inputs[1], **new_attrs)
-
-    g = g.apply("AlterOpLayout")
-    layouts = get_layouts(g)
-
-    # check copy layouts
-    for node in ["data", "relu", "flatten", "softmax", "conv_weight"]:
-        assert layouts[node] == layouts_origin[node]
-    assert layouts["conv_alter"] == layouts_origin["conv"]
-
-
-def test_consecutive_alter_layout():
-    data = sym.Variable("data", shape=(1, 32, 512, 512))
-    pool1 = sym.global_avg_pool2d(data, name="global_avg_pool2d_1", layout="NCHW")
-    pool2 = sym.global_avg_pool2d(pool1, name="global_avg_pool2d_2", layout="NCHW")
-    relu = sym.relu(pool2, name="relu")
-
-    g = graph.create(relu)
-    g = g.apply("CorrectLayout")
-    g = graph_attr.set_dtype_inputs(g, "float32")
-    g = g.apply(["InferShape", "InferType"])
-    assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
-
-    @reg.register_alter_op_layout("global_avg_pool2d", level=100)
-    def alter_global_avg_pool2d_layout(attrs, inputs, tinfos):
-        new_attrs = {k : attrs[k] for k in attrs.keys()}
-        new_attrs["layout"] = "NCHW16c"
-        return sym.global_avg_pool2d(inputs[0], **new_attrs)
-
-    g = g.apply("AlterOpLayout")
-
-    # pool1 get replaced - output layout of pool1 is not recorded
-    # pool2 get replaced - input layout of pool2 is not recorded
-    # thus the second entry must be undefined - it can neither recover from pool1's output,
-    # nor from pool2's input.
-    assert g.json_attr("layout") == ['NCHW', '__undef__', 'NCHW', 'NCHW']
-
-
-def test_alter_func_return_none():
-    data = sym.Variable("data", shape=(1, 32, 512, 512))
-    pool1 = sym.global_max_pool2d(data, name="pool1", layout="NCHW")
-    pool2 = sym.global_max_pool2d(pool1, name="pool2", layout="NCHW")
-    relu = sym.relu(pool2, name="relu")
-
-    g = graph.create(relu)
-    g = g.apply("CorrectLayout")
-    g = graph_attr.set_dtype_inputs(g, "float32")
-    g = g.apply(["InferShape", "InferType"])
-    assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
-
-    @reg.register_alter_op_layout("global_max_pool2d", level=100)
-    def alter_global_max_pool2d_layout(attrs, inputs, tinfos):
-        return None
-
-    g = g.apply("AlterOpLayout")
-
-    # alter func return none, nothing get replaced,
-    # the layouts should remain the same
-    assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
-
-
-if __name__ == "__main__":
-    test_alter_conv2d_layout()
-    test_consecutive_alter_layout()
-    test_alter_func_return_none()
diff --git a/nnvm/tests/python/compiler/test_autotvm_task_extraction.py b/nnvm/tests/python/compiler/test_autotvm_task_extraction.py
deleted file mode 100644
index 1ecbf053f923..000000000000
--- a/nnvm/tests/python/compiler/test_autotvm_task_extraction.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test task extraction for autotvm"""
-
-import nnvm.testing
-import nnvm.compiler
-from tvm import autotvm
-
-def get_network(name, batch_size):
-    """Get the symbol definition and random weight of a network"""
-    input_shape = (batch_size, 3, 224, 224)
-    output_shape = (batch_size, 1000)
-
-    if name == 'resnet-18':
-        net, params = nnvm.testing.resnet.get_workload(num_layers=18, batch_size=batch_size)
-    elif name == 'mobilenet':
-        net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
-    elif name == 'squeezenet v1.1':
-        net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
-    elif name == 'vgg-16':
-        net, params = nnvm.testing.vgg.get_workload(num_layers=16, batch_size=batch_size)
-    elif name == 'dcgan':
-        net, params = nnvm.testing.dcgan.get_workload(batch_size=batch_size)
-        input_shape = (batch_size, 100)
-    else:
-        raise ValueError("Unsupported network: " + name)
-
-    return net, params, input_shape, output_shape
-
-def test_task_extraction():
-    target = 'llvm'
-    dtype = 'float32'
-
-    net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d,))
-    assert len(tasks) == 12
-
-    net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.dense,))
-    assert len(tasks) == 1
-
-    net, params, input_shape, out_shape = get_network('resnet-18', batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d, nnvm.sym.dense))
-    assert len(tasks) == 13
-
-    net, params, input_shape, out_shape = get_network('mobilenet', batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d, nnvm.sym.dense))
-    assert len(tasks) == 20
-
-    net, params, input_shape, out_shape = get_network('dcgan', batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d_transpose,))
-    assert len(tasks) == 4
-
-if __name__ == '__main__':
-    test_task_extraction()
diff --git a/nnvm/tests/python/compiler/test_build.py b/nnvm/tests/python/compiler/test_build.py
deleted file mode 100644
index a2a5ac659c8f..000000000000
--- a/nnvm/tests/python/compiler/test_build.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.compiler.build_module import _run_graph, precompute_prune
-
-def test_compile():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = sym.exp(y + x)
-    shape = (10, 128)
-    dtype = tvm.float32
-    shape_dict = {"x": shape, "y": shape}
-    def verify(graph, lib):
-        m = graph_runtime.create(graph, lib, tvm.cpu(0))
-        # get member functions
-        set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
-        na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-        nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-        # set inputs
-        set_input("x", na)
-        set_input("y", nb)
-        # execute
-        run()
-        # get outputs
-        out = tvm.nd.empty(shape, dtype)
-        get_output(0, out)
-        tvm.testing.assert_allclose(
-            out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
-
-    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
-    assert graph.index.num_nodes == 3
-    verify(graph, lib)
-
-    with nnvm.compiler.build_config(opt_level=0):
-        graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
-        # print(graph.ir())
-        assert graph.index.num_nodes == 4
-        verify(graph, lib)
-
-def test_run():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = sym.exp(y + x)
-    shape = (10, 10)
-    dtype = tvm.float32
-    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    res = _run_graph(z, {"x": nx, "y": ny})
-    tvm.testing.assert_allclose(
-        res[0].asnumpy(), np.exp(nx.asnumpy() + ny.asnumpy()))
-
-
-def test_precompute_prune():
-    x = sym.Variable("x") + 1
-    a = sym.Variable("a")
-    y = sym.Variable("y")
-    z = y + x + a
-    shape = (10, 10)
-    dtype = tvm.float32
-    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    params = {"x": nx, "a": na}
-    graph, lib, params = nnvm.compiler.build(
-        z, "llvm", shape={"y": ny.shape}, params=params)
-    assert graph.index.num_nodes == 4
-    m = graph_runtime.create(graph, lib, tvm.cpu(0))
-    params["y"] = ny
-    res = tvm.nd.empty(shape)
-    m["load_params"](nnvm.compiler.save_param_dict(params))
-    m.run()
-    out = m.get_output(0, out=res)
-    tvm.testing.assert_allclose(
-        res.asnumpy(), nx.asnumpy() + 1 + ny.asnumpy() + na.asnumpy())
-
-
-def test_dtypes():
-    x = sym.Variable("x")
-    y = sym.relu(x)
-    dshape = (1, 3, 32, 32)
-    oshape = dshape
-    for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']:
-        graph, lib, _ = nnvm.compiler.build(y, 'llvm', {"x": dshape}, dtype=dtype)
-        m = graph_runtime.create(graph, lib, tvm.cpu())
-        if 'float' in dtype:
-          data = np.random.uniform(size=dshape).astype(dtype)
-        elif 'int' in dtype:
-          data = np.random.randint(-127, 127, dshape).astype(dtype)
-        m.run(x=data)
-        data = (data > 0) * data
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), data, atol=1e-5, rtol=1e-5)
-
-def test_ndarray_output():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = x + y
-    shape = (10, 10)
-    dtype = tvm.float32
-    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    params = {"x": nx, "ny": ny}
-    graph, lib, params = nnvm.compiler.build(
-        z, "llvm", shape={"y": ny.shape, "x": nx.shape}, params=params)
-    m = graph_runtime.create(graph, lib, tvm.cpu(0))
-    m.set_input("x", nx)
-    m.set_input("y", ny)
-    m.run()
-    out = m.get_output(0)
-    tvm.testing.assert_allclose(
-        out.asnumpy(), nx.asnumpy() + ny.asnumpy())
-
-def test_ndarray_input():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = x + y
-    shape = (10, 10)
-    dtype = tvm.float32
-    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    params = {"x": nx, "ny": ny}
-    graph, lib, params = nnvm.compiler.build(
-        z, "llvm", shape={"y": ny.shape, "x": nx.shape}, params=params)
-    m = graph_runtime.create(graph, lib, tvm.cpu(0))
-    m.set_input("x", nx)
-    m.set_input("y", ny)
-    in_x = tvm.nd.empty(shape, dtype)
-    in_y = tvm.nd.empty(shape, dtype)
-    m.get_input("x", in_x)
-    m.get_input("y", in_y)
-    tvm.testing.assert_allclose(nx.asnumpy(), in_x.asnumpy())
-    tvm.testing.assert_allclose(ny.asnumpy(), in_y.asnumpy())
-    in_nx = m.get_input("x")
-    in_ny = m.get_input("y")
-    tvm.testing.assert_allclose(nx.asnumpy(), in_nx.asnumpy())
-    tvm.testing.assert_allclose(ny.asnumpy(), in_ny.asnumpy())
-
-def test_num_outputs():
-    x = sym.Variable('x')
-    z = sym.split(x, indices_or_sections=5, axis=1)
-    shape = (10, 10)
-    dtype = tvm.float32
-    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-    params = {"x": nx}
-    graph, lib, params = nnvm.compiler.build(
-        z, "llvm", shape={"x": nx.shape}, params=params)
-    m = graph_runtime.create(graph, lib, tvm.cpu(0))
-    assert m.get_num_outputs() == 5
-
-if __name__ == "__main__":
-    test_precompute_prune()
-    test_compile()
-    test_run()
-    test_dtypes()
-    test_ndarray_output()
-    test_ndarray_input()
-    test_num_outputs()
diff --git a/nnvm/tests/python/compiler/test_compiler_cache.py b/nnvm/tests/python/compiler/test_compiler_cache.py
deleted file mode 100644
index c0f207a66bf0..000000000000
--- a/nnvm/tests/python/compiler/test_compiler_cache.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-
-def test_compile_cache():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = sym.exp(y + x)
-    shape = (10, 1)
-    dtype = tvm.float32
-    shape_dict = {"x": shape, "y": shape}
-    def verify(graph, lib):
-        m = graph_runtime.create(graph, lib, tvm.cpu(0))
-        # get member functions
-        na = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-        nb = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
-        m.run(x=na, y=nb)
-        # get outputs
-        out = m.get_output(0, tvm.nd.empty(shape, dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
-
-    engine = nnvm.compiler.engine
-    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
-    inputs = [tvm.placeholder((10,)), tvm.placeholder((10,))]
-
-    gkey = nnvm.compiler.graph_key(nnvm.graph.create(z), inputs, "llvm")
-    gkey2 = nnvm.compiler.graph_key(nnvm.graph.create(z), inputs + inputs, "llvm")
-    gf = engine[gkey]
-    assert gf is not None
-    assert engine[gkey2] is None
-    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
-    assert graph.index.num_nodes == 3
-    verify(graph, lib)
-    # Test various set external cache
-    engine.clear_cache()
-    engine[gkey] = gf
-
-if __name__ == "__main__":
-    test_compile_cache()
diff --git a/nnvm/tests/python/compiler/test_fold_axis.py b/nnvm/tests/python/compiler/test_fold_axis.py
deleted file mode 100644
index 2bceb652162a..000000000000
--- a/nnvm/tests/python/compiler/test_fold_axis.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for fold_axis"""
-import tvm
-import nnvm
-import nnvm.testing.resnet
-import numpy as np
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-
-def test_fold_axis_conv():
-    # Before simplify
-    def before(x, conv_weight, conv_bias, in_scale, out_scale, channels):
-        x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
-        y = sym.conv2d(x, conv_weight, conv_bias,
-                       channels=channels,
-                       kernel_size=(3, 3),
-                       padding=(1, 1),
-                       name="conv")
-        y = sym.relu(y)
-        y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2)
-        return y
-
-    def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels):
-        conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3)
-        conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
-        conv_bias = conv_bias * out_scale
-        y = sym.conv2d(x,
-                       conv_weight,
-                       conv_bias,
-                       channels=channels,
-                       kernel_size=(3, 3),
-                       padding=(1, 1),
-                       name="conv")
-        y = sym.relu(y)
-        return y
-
-    def check(shape, channels):
-        x = sym.Variable("x") + 1
-        weight = sym.Variable("weight")
-        bias = sym.Variable("bias")
-        in_scale = sym.Variable("in_scale")
-        out_scale = sym.Variable("out_scale")
-        y1 = before(x, weight, bias, in_scale, out_scale, channels)
-        y2 = expected(x, weight, bias, in_scale, out_scale, channels)
-        ishape = {"x": shape, "out_scale": (channels,), "in_scale": (shape[1],)}
-        g1 = nnvm.graph.create(y1)
-        g2 = nnvm.graph.create(y2)
-        graph_attr.set_shape_inputs(g1, ishape)
-        g1 = g1.apply("InferShape").apply("FoldScaleAxis")
-        # assert graph equals as expected
-        graph_util.check_graph_equal(g1, g2)
-
-    check((2, 4, 10, 10), 2)
-
-def test_fold_axis_depthwise_conv():
-    # Before simplify
-    def before(x, conv_weight, conv_bias, in_scale, out_scale, channels):
-        x = x * sym.expand_dims(in_scale, axis=1, num_newaxis=2)
-        y = sym.conv2d(x, conv_weight, conv_bias,
-                       channels=channels,
-                       kernel_size=(3, 3),
-                       padding=(1, 1),
-                       groups=54,
-                       name="depthiwise_conv")
-        y = sym.relu(y)
-        y = y * sym.expand_dims(out_scale, axis=1, num_newaxis=2)
-        return y
-
-    def expected(x, conv_weight, conv_bias, in_scale, out_scale, channels):
-        conv_weight = conv_weight * sym.expand_dims(out_scale, axis=1, num_newaxis=3)
-        conv_weight = conv_weight * sym.expand_dims(in_scale, axis=1, num_newaxis=3)
-        conv_bias = conv_bias * out_scale
-        y = sym.conv2d(x,
-                       conv_weight,
-                       conv_bias,
-                       channels=channels,
-                       kernel_size=(3, 3),
-                       padding=(1, 1),
-                       groups=54,
-                       name="depthiwise_conv")
-        y = sym.relu(y)
-        return y
-
-    def check(shape, channels):
-        x = sym.Variable("x") + 1
-        weight = sym.Variable("weight")
-        bias = sym.Variable("bias")
-        in_scale = sym.Variable("in_scale")
-        out_scale = sym.Variable("out_scale")
-        y1 = before(x, weight, bias, in_scale, out_scale, channels)
-        y2 = expected(x, weight, bias, in_scale, out_scale, channels)
-        ishape = {"x": shape, "out_scale": (channels,), "in_scale": (shape[1],)}
-        g1 = nnvm.graph.create(y1)
-        g2 = nnvm.graph.create(y2)
-        graph_attr.set_shape_inputs(g1, ishape)
-        g1 = g1.apply("InferShape").apply("FoldScaleAxis")
-        # assert graph equals as expected
-        graph_util.check_graph_equal(g1, g2)
-
-    check((1, 54, 63, 127), 54)
-
-def test_fold_fail():
-    # Before simplify
-    def before(x, scale, channels):
-        y = sym.conv2d(x,
-                       channels=channels,
-                       kernel_size=(3, 3),
-                       padding=(1, 1),
-                       name="conv")
-        y = y * sym.expand_dims(scale, axis=1, num_newaxis=1)
-        return y
-
-    def check(shape, channels):
-        x = sym.Variable("x")
-        bias = sym.Variable("bias")
-        scale = sym.Variable("scale")
-        y1 = before(x, scale, channels)
-        ishape = {"x": shape, "scale": (channels,), "bias": (channels,)}
-        g1 = nnvm.graph.create(y1)
-        graph_attr.set_shape_inputs(g1, ishape)
-        g2 = g1.apply("InferShape").apply("FoldScaleAxis")
-        # assert graph equals as expected
-        graph_util.check_graph_equal(g1, g2)
-
-    check((2, 10, 10, 10), 10)
-
-
-def test_fold_resnet():
-    batch_size = 1
-    num_classes = 1000
-    image_shape = (3, 224, 224)
-    data_shape = (batch_size,) +image_shape
-    net, params = nnvm.testing.resnet.get_workload(
-        batch_size=1, image_shape=image_shape)
-    ishape = {"data" : data_shape}
-    graph = nnvm.graph.create(net)
-    data = np.random.uniform(size=data_shape).astype("float32")
-    # Initial pass do shape type inference
-    shape, _ = graph_util.infer_shape(graph, **ishape)
-    ishape.update(zip(graph.index.input_names, shape))
-
-    def run_prune(graph, params, opt_level):
-        # Apply optimization
-        with nnvm.compiler.build_config(opt_level=0):
-            graph = nnvm.compiler.optimize(graph, ishape)
-        graph, params = nnvm.compiler.build_module.precompute_prune(graph, params)
-        params["data"] = data
-        return nnvm.compiler.build_module._run_graph(graph, params)
-
-    x = run_prune(graph, params, 0)
-    y = run_prune(graph, params, 3)
-    tvm.testing.assert_allclose(y[0].asnumpy(), x[0].asnumpy())
-
-
-if __name__ == "__main__":
-    test_fold_resnet()
-    test_fold_axis_conv()
-    test_fold_fail()
-    test_fold_axis_depthwise_conv()
diff --git a/nnvm/tests/python/compiler/test_graph_pass.py b/nnvm/tests/python/compiler/test_graph_pass.py
deleted file mode 100644
index d65a2be9abf8..000000000000
--- a/nnvm/tests/python/compiler/test_graph_pass.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for graph pass"""
-import nnvm
-import nnvm.compiler
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-
-def test_infer_attr():
-    x = sym.Variable("x")
-    y = x * 2
-    g = nnvm.graph.create(y)
-    ishape, oshape = graph_util.infer_shape(g, x=(10,20))
-    assert tuple(oshape[0]) == (10, 20)
-
-    itype, otype = graph_util.infer_dtype(g, x="float32")
-    assert otype[0] == "float32"
-
-if __name__ == "__main__":
-    test_infer_attr()
diff --git a/nnvm/tests/python/compiler/test_nhwc_layout.py b/nnvm/tests/python/compiler/test_nhwc_layout.py
deleted file mode 100644
index e3747daf8563..000000000000
--- a/nnvm/tests/python/compiler/test_nhwc_layout.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime as runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-
-def get_sym(layout, kernel_layout, channels):
-    data = sym.Variable(name="data")
-    data = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
-                      layout=layout, kernel_layout=kernel_layout, use_bias=True)
-    data = sym.max_pool2d(data=data, pool_size=(2, 2), strides=(2, 2), layout=layout)
-    data = sym.upsampling(data=data, scale=2, layout=layout)
-    softmax_axis = 1
-    if layout == "NHWC":
-        softmax_axis = 3
-    data = sym.softmax(data=data, axis=softmax_axis)
-    return data
-
-
-def build_and_run(sym, params, data, out_shape):
-    ctx = tvm.cpu(0)
-    graph, lib, params = nnvm.compiler.build(sym, "llvm", shape={"data":data.shape}, params=params)
-    module = runtime.create(graph, lib, ctx)
-    module.set_input(**params)
-    module.set_input("data", data)
-    module.run()
-    out =  module.get_output(0, tvm.nd.empty(out_shape))
-    return out.asnumpy()
-
-
-def test_nhwc():
-    data_shape = (1, 3, 224, 224)
-    out_channel = 8
-    nchw_sym = get_sym("NCHW", "OIHW", out_channel)
-    nhwc_sym = get_sym("NHWC", "HWIO", out_channel)
-    conv_weight = np.random.uniform(-1, 1, (out_channel, 3, 3, 3)).astype(np.float32)
-    conv_bias = np.random.uniform(-1, 1, (out_channel)).astype(np.float32)
-    nchw_params = {
-        "conv2d0_weight" : tvm.nd.array(conv_weight, ctx=tvm.cpu(0)),
-        "conv2d0_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0))
-    }
-    nhwc_params = {
-        "conv2d1_weight" : tvm.nd.array(conv_weight.transpose(2, 3, 1, 0), ctx=tvm.cpu(0)),
-        "conv2d1_bias" : tvm.nd.array(conv_bias, ctx=tvm.cpu(0))
-    }
-
-    data = np.random.uniform(-1, 1, data_shape).astype(np.float32)
-    oshape = (1, out_channel, 224, 224)
-    oshape_nhwc = (1, 224, 224, out_channel)
-    nchw_output = build_and_run(nchw_sym, nchw_params, data, oshape)
-    nhwc_output = build_and_run(nhwc_sym, nhwc_params, data.transpose(0, 2, 3, 1), oshape_nhwc)
-    tvm.testing.assert_allclose(nchw_output, nhwc_output.transpose(0, 3, 1, 2), rtol=1e-5, atol=1e-5)
-
-
-if __name__ == "__main__":
-    test_nhwc()
diff --git a/nnvm/tests/python/compiler/test_op_fusion.py b/nnvm/tests/python/compiler/test_op_fusion.py
deleted file mode 100644
index bc0caeecf58c..000000000000
--- a/nnvm/tests/python/compiler/test_op_fusion.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-import numpy as np
-import tvm
-import topi.testing
-from tvm.contrib import graph_runtime
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-from nnvm.testing import ctx_list, utils
-
-def test_ewise_injective():
-    x = sym.Variable("x")
-    y = x * 2
-    y = sym.flatten(y) + 1
-    dshape = (10, 2, 3)
-    shape_dict = {"x": dshape}
-    dtype = "float32"
-    target = "llvm"
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        assert graph.index.num_nodes == 2
-        m = graph_runtime.create(graph, lib, ctx)
-        x_np = np.random.uniform(size=dshape).astype(dtype)
-        m.run(x=x_np)
-        out = m.get_output(0, tvm.nd.empty((10, 6)))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),  x_np.reshape(out.shape) * 2 + 1,
-            atol=1e-5, rtol=1e-5)
-
-
-def test_conv_ewise_injective():
-    x = sym.Variable("x")
-    y = sym.conv2d(x, channels=32, kernel_size=(3, 3), groups=32,
-                   name="y", padding=(1,1))
-    y = sym.flatten(y + 1) + 1
-    dtype = "float32"
-    dshape = (1, 32, 18, 18)
-    kshape = (32, 1, 3, 3)
-    oshape = (1, 32* 18 * 18)
-    shape_dict = {"x": dshape}
-
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        # print(graph.ir(join_entry_attrs=["shape"]))
-        assert graph.index.num_nodes == 5
-        # set input
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-        bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
-        m.run(x=data, y_weight=kernel, y_bias=bias)
-        # get output
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        c_np = topi.testing.depthwise_conv2d_python_nchw(
-            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
-        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + 1
-        c_np = c_np.reshape(c_np.shape[0], np.prod(c_np.shape[1:])) + 1
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_injective_reduce_injective():
-    x = sym.Variable("x")
-    x = sym.flatten(x) + 1
-    y = sym.sum(x, axis=1)
-    dtype = "float32"
-    dshape = (32, 1, 18, 18)
-    shape_dict = {"x": dshape}
-
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        assert graph.index.num_nodes == 2
-        data = np.random.uniform(size=dshape).astype(dtype)
-        m.run(x=data)
-        c_np = np.sum(data.reshape(32, 18 * 18) + 1, axis=1)
-        # get output
-        out = m.get_output(0, tvm.nd.empty(c_np.shape, dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_injective_conv2d():
-    channels = 16
-    data = sym.Variable(name="data")
-    pool = sym.global_avg_pool2d(data=data)
-    weight = sym.reshape(pool, shape=[1, channels, 1, 1])
-    residual = sym.conv2d(data=data, kernel_size=(3,3), channels=channels, padding=(1, 1),
-                          layout="NCHW", kernel_layout="OIHW", use_bias=False, name="conv")
-    net = weight * data + residual
-    size = 56
-    dtype="float32"
-    dshape = (1, channels, size, size)
-    kshape = (channels, channels, 3, 3)
-    oshape = dshape
-    shape_dict = {"data": dshape}
-
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
-        # data, global_avg_pool, conv weight, conv op, fused elemwise add
-        assert graph.index.num_nodes == 5
-
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run(data=data, conv_weight=kernel)
-        # get output
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        residual = topi.testing.conv2d_nchw_python(
-            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
-        weight = np.mean(data.asnumpy(), axis=(2, 3))
-        c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_concatenate_conv2d():
-    ch = 3
-    size = 8
-    data = sym.Variable(name="data")
-    concat = sym.concatenate(data, data, axis=1)
-    conv = sym.conv2d(data=concat, kernel_size=(1,1), channels=ch*2, use_bias=False, name="conv")
-    net = sym.elemwise_add(concat, conv)
-
-    dtype="float32"
-    dshape = (1, ch, size, size)
-    kshape = (ch*2, ch*2, 1, 1)
-    oshape = (1, ch*2, size, size)
-    shape_dict = {"data": dshape}
-
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(net, target, shape_dict)
-        # data, conv weight, conv op, concat
-        assert graph.index.num_nodes == 4
-
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run(data=data, conv_weight=kernel)
-        # get output
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-
-        concat = np.concatenate((data.asnumpy(), data.asnumpy()), axis=1)
-        conv = topi.testing.conv2d_nchw_python(
-            concat, kernel.asnumpy(), (1,1), 'SAME')
-        ref = concat + conv
-        tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
-
-
-def test_residual_block_layout_transform():
-    ch = 16
-    size = 32
-    data = sym.Variable(name="data")
-    conv1 = sym.conv2d(data=data, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv1")
-    layout_transform1 = sym.__layout_transform__(data=conv1, src_layout="NCHW", dst_layout="NCHW8c")
-    layout_transform2 = sym.__layout_transform__(data=layout_transform1, src_layout="NCHW8c", dst_layout="NCHW")
-    conv2 = sym.conv2d(data=conv1, kernel_size=(3,3), channels=ch, padding = (1, 1), use_bias=False, name="conv2")
-    elemwise_sum = sym.elemwise_add(layout_transform2, conv2)
-    out = sym.relu(elemwise_sum)
-
-    dtype="float32"
-    dshape = (1, ch, size, size)
-    kshape = (ch, ch, 3, 3)
-    oshape = (1, ch, size, size)
-    shape_dict = {"data": dshape}
-
-    target = "llvm" # only test on llvm since it involves NCHW8c layout
-    ctx = tvm.context(target, 0)
-    graph, lib, _ = nnvm.compiler.build(out, target, shape_dict)
-    # data, conv1 weight, conv1, layout transform + elemwise add + relu, conv2 weight, conv2 op
-    assert graph.index.num_nodes == 6
-
-    data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-    kernel1 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-    kernel2 = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-    m = graph_runtime.create(graph, lib, ctx)
-    m.run(data=data, conv1_weight=kernel1, conv2_weight=kernel2)
-    out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-
-    conv1 = topi.testing.conv2d_nchw_python(
-        data.asnumpy(), kernel1.asnumpy(), (1,1), 'SAME')
-    conv2 = topi.testing.conv2d_nchw_python(
-        conv1, kernel2.asnumpy(), (1,1), 'SAME')
-    ref = np.maximum(conv1 + conv2, 0)
-    tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
-
-
-def build_and_run(sym, params, data, out_shape, target, ctx, opt_level=2):
-    with nnvm.compiler.build_config(opt_level=opt_level):
-        graph, lib, params = nnvm.compiler.build(sym, target, shape={"data":data.shape}, params=params)
-    module = graph_runtime.create(graph, lib, ctx)
-    module.set_input(**params)
-    module.set_input("data", data)
-    module.run()
-    out =  module.get_output(0, tvm.nd.empty(out_shape))
-    return out.asnumpy(), graph
-
-
-def test_fuse_conv2d_elu():
-    def elu(data):
-        return -0.5 * sym.relu(1 - sym.exp(data)) + sym.relu(data)
-
-    def get_sym(out_channel):
-        data = sym.Variable(name="data")
-        data = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channel, padding=(1, 1),
-                          layout="NCHW", kernel_layout="OIHW", use_bias=True)
-        data = sym.batch_norm(data)
-        data = elu(data)
-        return data
-
-    in_channel = 8
-    out_channel = 16
-    size = 64
-    dshape = (1, in_channel, size, size)
-    oshape = (1, out_channel, size, size)
-    data = np.random.uniform(-1, 1, dshape).astype(np.float32)
-
-    for target, ctx in ctx_list():
-        sym1 = get_sym(out_channel)
-        sym2 = get_sym(out_channel)
-        _, params1 = utils.create_workload(sym1, 1, dshape[1:], seed=0)
-        _, params2 = utils.create_workload(sym2, 1, dshape[1:], seed=0)
-        output1, g1 = build_and_run(sym1, params1, data, oshape, target, ctx, opt_level=2)
-        output2, g2 = build_and_run(sym2, params2, data, oshape, target, ctx, opt_level=0)
-        tvm.testing.assert_allclose(output1, output2, rtol=1e-5, atol=1e-5)
-        # data, conv weight, bias, batch norm gamma, batch norm beta, conv op
-        assert g1.index.num_nodes == 6
-
-if __name__ == "__main__":
-    test_injective_reduce_injective()
-    test_ewise_injective()
-    test_conv_ewise_injective()
-    test_fuse_conv2d_elu()
-    test_injective_conv2d()
-    test_concatenate_conv2d()
-    test_residual_block_layout_transform()
diff --git a/nnvm/tests/python/compiler/test_optimizer.py b/nnvm/tests/python/compiler/test_optimizer.py
deleted file mode 100644
index 86a9b71b46dc..000000000000
--- a/nnvm/tests/python/compiler/test_optimizer.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import nnvm
-import nnvm.compiler.optimizer as optimizer
-import nnvm.compiler.lr_scheduler as lr_scheduler
-
-from nnvm.testing.config import ctx_list
-from tvm.contrib import graph_runtime
-
-
-def helper(symbol, inputs, params, update_func, run_times, target, ctx, dtype="float32"):
-    ishapes = {}
-    np_inputs = {}
-    params_dict = {}
-    for (name, shape, s) in inputs:
-        ishapes.update({name: shape})
-        np_inputs.update({name: np.random.uniform(size=shape).astype(dtype)})
-    for (name, shape, s) in params:
-        np_inputs.update({name: np.random.uniform(size=shape).astype(dtype)})
-        params_dict.update({name: np_inputs[name]})
-
-    graph, lib, rt_params = nnvm.compiler.build(symbol, target, shape=ishapes)
-    m = graph_runtime.create(graph, lib, ctx)
-    m.set_input(**np_inputs)
-    m.set_input(**rt_params)
-    for _ in range(run_times):
-        m.run()
-    y_np = update_func(**np_inputs)
-    out = m.get_output(0, tvm.nd.empty(y_np.shape, dtype))
-    tvm.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
-
-
-def test_sgd():
-    for target, ctx in ctx_list():
-        data = nnvm.sym.Variable("data")
-        weight = nnvm.sym.Variable("weight")
-        out = nnvm.sym.elemwise_mul(data, weight ** 2)
-
-        dshape = (1, 2, 3)
-        wshape = dshape
-
-        base_lr = 0.1
-        lr_factor = 0.5
-        rescale_grad = 0.2
-        wd = 0.1
-        clip_gradient = 0.25
-
-        scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr, step=1, factor=lr_factor)
-        opt = optimizer.SGD(learning_rate=base_lr, lr_scheduler=scheduler,
-                            rescale_grad=rescale_grad, clip_gradient=clip_gradient,
-                            wd=wd)
-        opt_sym = opt.minimize(out, var=weight)
-
-        inputs = [("data", dshape, data)]
-        params = [("weight", wshape, weight)]
-
-        def update_func(data, weight):
-            gradient_0 = data * 2 * weight * rescale_grad
-            gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient)
-            weight_0 = weight - base_lr * lr_factor * (gradient_0 + wd * weight)
-            gradient_1 = data * 2 * weight_0 * rescale_grad
-            gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient)
-            weight_1 = weight_0 - base_lr * (lr_factor ** 2) * (gradient_1 + wd * weight_0)
-            return weight_1
-
-        helper(opt_sym, inputs, params, update_func, 2, target, ctx)
-
-
-
-def test_adam():
-    for target, ctx in ctx_list():
-        data = nnvm.sym.Variable("data")
-        weight = nnvm.sym.Variable("weight")
-        out = nnvm.sym.elemwise_mul(data, weight ** 2)
-
-        dshape = (1, 2, 3)
-        wshape = dshape
-
-        base_lr = 0.1
-        beta1 = 0.9
-        beta2 = 0.999
-        epsilon = 1e-8
-        lr_factor = 0.5
-        rescale_grad = 0.2
-        wd = 0.1
-        clip_gradient = 0.25
-
-        scheduler = lr_scheduler.FactorScheduler(base_lr=base_lr, step=1, factor=lr_factor)
-        opt = optimizer.Adam(learning_rate=base_lr, beta1=beta1, beta2=beta2, epsilon=epsilon,
-                             lr_scheduler=scheduler, rescale_grad=rescale_grad,
-                             clip_gradient=clip_gradient, wd=wd)
-        opt_sym = opt.minimize(out, var=weight)
-
-        inputs = [("data", dshape, data)]
-        params = [("weight", wshape, weight)]
-
-        def update_func(data, weight):
-            rate_0 = np.sqrt(1 - beta2) / (1 - beta1)
-            lr_0 = base_lr * lr_factor * rate_0
-            gradient_0 = data * 2 * weight * rescale_grad
-            gradient_0 = np.clip(gradient_0, -clip_gradient, clip_gradient)
-            m_0 = (1 - beta1) * gradient_0
-            v_0 = (1 - beta2) * (gradient_0 ** 2)
-            weight_0 = weight - lr_0 * (m_0 / (np.sqrt(v_0) + epsilon) + wd * weight)
-            rate_1 = np.sqrt(1 - beta2 ** 2) / (1 - beta1 ** 2)
-            lr_1 = base_lr * (lr_factor ** 2) * rate_1
-            gradient_1 = data * 2 * weight_0 * rescale_grad
-            gradient_1 = np.clip(gradient_1, -clip_gradient, clip_gradient)
-            m_1 = beta1 * m_0 + (1 - beta1) * gradient_1
-            v_1 = beta2 * v_0 + (1 - beta2) * (gradient_1 ** 2)
-            weight_1 = weight_0 - lr_1 * (m_1 / (np.sqrt(v_1) + epsilon) + wd * weight_0)
-            return weight_1
-
-        helper(opt_sym, inputs, params, update_func, 2, target, ctx)
-
-if __name__ == "__main__":
-    test_sgd()
-    test_adam()
diff --git a/nnvm/tests/python/compiler/test_param_dict.py b/nnvm/tests/python/compiler/test_param_dict.py
deleted file mode 100644
index b30f8f99082c..000000000000
--- a/nnvm/tests/python/compiler/test_param_dict.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import os
-import numpy as np
-import nnvm.compiler
-import tvm
-import json
-import base64
-from tvm._ffi.base import py_str
-from tvm import rpc
-from tvm.contrib import util, graph_runtime
-
-
-def test_save_load():
-    x = np.random.uniform(size=(10, 2)).astype("float32")
-    y = np.random.uniform(size=(1, 2, 3)).astype("float32")
-    x[:] = 1
-    y[:] = 1
-    params = {"x": x, "y": y}
-    param_bytes = nnvm.compiler.save_param_dict(params)
-    assert isinstance(param_bytes, bytearray)
-    param2 = nnvm.compiler.load_param_dict(param_bytes)
-    assert len(param2) == 2
-    np.testing.assert_equal(param2["x"].asnumpy(), x)
-    np.testing.assert_equal(param2["y"].asnumpy(), y)
-
-
-def test_ndarray_reflection():
-    x = np.random.uniform(size=(10, 2)).astype("float32")
-    xx = tvm.nd.array(x)
-    xnode = tvm.make.node("NDArrayWrapper", name="xx", array=xx)
-    xnode2 = tvm.make.node("NDArrayWrapper", name="x2", array=xx)
-    assert xnode.array.same_as(xx)
-    json_str = tvm.save_json([xnode, xnode2])
-    json_dict = json.loads(json_str)
-    b64_str = json_dict["b64ndarrays"][0]
-    decoded = py_str(base64.b64encode(base64.b64decode(b64_str)))
-    assert b64_str == decoded
-    xlist = tvm.load_json(json_str)
-    np.testing.assert_equal(xlist[0].array.asnumpy(), xx.asnumpy())
-    assert xlist[1].array == xlist[0].array
-
-
-def test_bigendian_rpc_param():
-    """Test big endian rpc when there is a PowerPC RPC server available"""
-    host = os.environ.get("TVM_POWERPC_TEST_HOST", None)
-    port = os.environ.get("TVM_POWERPC_TEST_PORT", 9090)
-    if host is None:
-        return
-
-    def verify_nnvm(remote, target, shape, dtype):
-        x = nnvm.sym.Variable("x")
-        y = x + 1
-        graph, lib, _ = nnvm.compiler.build(
-            y, target,
-            shape={"x": shape},
-        dtype={"x": dtype})
-
-        temp = util.tempdir()
-        path_dso = temp.relpath("dev_lib.o")
-        lib.save(path_dso)
-        remote.upload(path_dso)
-        lib = remote.load_module("dev_lib.o")
-        a = np.random.randint(0, 256, size=shape).astype(dtype)
-        a[:] = 1
-        params = {"x" : a}
-        ctx = remote.cpu(0)
-        m = graph_runtime.create(graph, lib, ctx)
-        # uses save param_dict
-        m.load_params(nnvm.compiler.save_param_dict(params))
-        m.run()
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
-        tvm.testing.assert_allclose(a + 1, out.asnumpy())
-
-    print("Test RPC connection to PowerPC...")
-    remote = rpc.connect(host, port)
-    target = "llvm -mtriple=powerpc-linux-gnu"
-    for dtype in ["float32", "float64", "int32", "int8"]:
-        verify_nnvm(remote, target, (10,), dtype)
-
-
-
-if __name__ == "__main__":
-    test_ndarray_reflection()
-    test_save_load()
-    test_bigendian_rpc_param()
diff --git a/nnvm/tests/python/compiler/test_rpc_exec.py b/nnvm/tests/python/compiler/test_rpc_exec.py
deleted file mode 100644
index 1584f7c589a4..000000000000
--- a/nnvm/tests/python/compiler/test_rpc_exec.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import tvm
-from tvm import rpc
-from tvm.contrib import util, graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-import numpy as np
-import time
-
-def test_rpc_executor():
-    host = "localhost"
-    port = 9021
-    server = rpc.Server(host, port, use_popen=True)
-    time.sleep(1)
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = sym.exp(y + x)
-    shape = (10, 128)
-    dtype = tvm.float32
-    shape_dict = {"x": shape, "y": shape}
-    tmp = util.tempdir()
-    lib_name  = tmp.relpath("net.o")
-
-    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
-    # save module
-    lib.save(lib_name)
-    remote = rpc.connect(host, port)
-    remote.upload(lib_name)
-    ctx = remote.cpu(0)
-    # load remote
-    rlib = remote.load_module("net.o")
-
-    # Create remotemodule
-    m = graph_runtime.create(graph, rlib, remote.cpu(0))
-    # get member functions
-    set_input, run, get_output = m["set_input"], m["run"], m["get_output"]
-    na = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
-    nb = tvm.nd.array(np.ones(shape).astype(dtype), ctx)
-    # set inputs
-    set_input("x", na)
-    set_input("y", nb)
-    # execute
-    run()
-    # get outputs
-    out = tvm.nd.empty(shape, dtype, ctx)
-    get_output(0, out)
-    tvm.testing.assert_allclose(
-        out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
-    server.terminate()
-
-if __name__ == "__main__":
-    test_rpc_executor()
diff --git a/nnvm/tests/python/compiler/test_simplify_inference.py b/nnvm/tests/python/compiler/test_simplify_inference.py
deleted file mode 100644
index 2f520bd6c125..000000000000
--- a/nnvm/tests/python/compiler/test_simplify_inference.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Unittest cases for simplify batch_norm"""
-import nnvm
-from nnvm import symbol as sym
-from nnvm.compiler import graph_util, graph_attr
-
-def test_simplify_batchnorm():
-    def simple_bn(x, gamma, beta, moving_mean, moving_var,
-                  axis=1, epsilon=1e-5, shape=None):
-        # expect = (x - moving_mean) / sym.sqrt(moving_var + eps) * gamma + beta
-        scale = sym.elemwise_mul(1 / sym.sqrt(moving_var + epsilon), gamma)
-        shift = sym.elemwise_add(
-            sym.elemwise_mul(sym.negative(moving_mean), scale), beta)
-        # for 2D
-        num_newaxis=len(shape) - axis - 1
-        if num_newaxis:
-            scale = sym.expand_dims(scale, axis=1, num_newaxis=num_newaxis)
-            shift = sym.expand_dims(shift, axis=1, num_newaxis=num_newaxis)
-        return x * scale + shift
-
-
-    # Before simplify
-    def check(dim, axis, nstep):
-        eps = 0.01
-        x = sym.Variable("x") + 1
-        beta = sym.Variable("beta")
-        gamma = sym.Variable("gamma")
-        moving_var = sym.Variable("moving_var")
-        moving_mean = sym.Variable("moving_mean")
-        y1, y2 = x, sym.Variable("xx") + 1
-        ishape = {"x": tuple(10 for i in range(dim))}
-        for i in range(nstep):
-            y1 = sym.batch_norm(
-                y1 + 1, gamma, beta, moving_mean, moving_var, epsilon=eps, axis=axis)
-            y1 = sym.dropout(y1)
-            y2 = simple_bn(y2 + 1, gamma, beta, moving_mean, moving_var,
-                           epsilon=eps, axis=axis, shape=ishape["x"])
-        g = nnvm.graph.create(y1)
-        g2 = nnvm.graph.create(y2)
-        graph_attr.set_shape_inputs(g, ishape)
-        g1 = g.apply("InferShape").apply("SimplifyInference")
-        # assert graph equals as expected
-        graph_util.check_graph_equal(g1, g2)
-
-    check(2, 1, 1)
-    check(4, 0, 3)
-    check(4, 1, 2)
-
-if __name__ == "__main__":
-    test_simplify_batchnorm()
diff --git a/nnvm/tests/python/compiler/test_to_relay.py b/nnvm/tests/python/compiler/test_to_relay.py
deleted file mode 100644
index dac14a8c1f22..000000000000
--- a/nnvm/tests/python/compiler/test_to_relay.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-from nnvm import testing
-from nnvm import to_relay
-import tvm
-from tvm.relay import transform
-from tvm.relay import create_executor
-from tvm.contrib import graph_runtime
-import numpy as np
-
-def check_model(sym, shapes, dtypes, params):
-    net = nnvm.graph.create(sym)
-    graph_json, mod, params = nnvm.compiler.build(
-        net,
-        'llvm',
-        shape=shapes,
-        dtype=dtypes,
-        params=params)
-    nnvm_rts = graph_runtime.create(graph_json, mod, tvm.cpu(0))
-    inputs = {}
-    for name in shapes:
-        np_array = np.random.rand(*shapes[name]).astype('float32')
-        inputs[name] = tvm.nd.array(np_array)
-
-    nnvm_rts.set_input(**params)
-    nnvm_rts.run(**inputs)
-    nnvm_out = nnvm_rts.get_output(0)
-    relay_model, params = to_relay.to_relay(net, shapes, dtypes, params)
-    mod = tvm.relay.Module.from_expr(relay_model)
-    mod = transform.InferType()(mod)
-    relay_rts = create_executor(kind='graph', mod=mod, ctx=tvm.cpu(0), target='llvm')
-    inputs.update(params)
-    relay_out = relay_rts.evaluate()(*list(inputs.values()))
-    np.testing.assert_allclose(nnvm_out.asnumpy(), relay_out.asnumpy())
-
-# def test_mlp():
-#     mlp, params = testing.mlp.get_workload(1)
-#     shapes =  { "data": (10, 3, 224, 224) }
-#     dtypes =  { "data": 'float32' }
-#     check_model(mlp, shapes, dtypes, params)
-
-if __name__ == "__main__":
-    test_mlp()
diff --git a/nnvm/tests/python/compiler/test_top_assign.py b/nnvm/tests/python/compiler/test_top_assign.py
deleted file mode 100644
index dae0506edc36..000000000000
--- a/nnvm/tests/python/compiler/test_top_assign.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-
-
-def test_update():
-    w = sym.Variable("w")
-    w2 = sym.Variable("w2")
-    w = sym._assign(w, w + 1)
-    w2 = sym._assign(w2, w + 1)
-
-    dshape = (5, 3, 18, 18)
-    shape_dict = {"w": dshape, "w2":dshape}
-    dtype = "float32"
-
-    def check(target, ctx):
-        graph, lib, _ = nnvm.compiler.build(w2, target, shape_dict)
-
-        m = graph_runtime.create(graph, lib, ctx)
-
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.set_input("w", data)
-        m.run()
-        out = m.get_input("w2", tvm.nd.empty(dshape, dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 2, rtol=1e-5)
-
-        m.run()
-        out = m.get_input("w2", tvm.nd.empty(dshape, dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 3, rtol=1e-5)
-
-    for target, ctx in ctx_list():
-        check(target, ctx)
-
-
-if __name__ == "__main__":
-    test_update()
diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py
deleted file mode 100644
index ae6266cdde54..000000000000
--- a/nnvm/tests/python/compiler/test_top_level1.py
+++ /dev/null
@@ -1,605 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm.testing.check_computation import check_function
-
-def test_check_function():
-    # test the testing function
-
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-
-    # different styles of returning gradients from the backward function
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: [head_grads, 2*head_grads],
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: (head_grads, 2*head_grads),
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: {'x': head_grads, 'y': 2*head_grads},
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: {'y': 2*head_grads},
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: [2*head_grads],
-                   grad_input_vars=[y],
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: 2*head_grads,
-                   grad_input_vars=[y],
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float32')
-    check_function(x + 2*y, lambda x, y: x + 2*y,
-                   lambda x, y, head_grads: 2*head_grads,
-                   grad_input_vars=[y],
-                   shape={'x': (1, 2), y: (1, 2)}, dtype='float64')
-
-    # test just numerical gradients
-    # different styles of shape and dtype passing
-    check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)},
-                   numerical_grads=True)
-    check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype='float32',
-                   numerical_grads=True)
-    check_function(x + 2*y, shape={'x': (1, 2), y: (1, 2)}, dtype={x: 'float32', 'y': 'float32'},
-                   numerical_grads=True)
-    check_function(x + 2*y, shape=(1, 2), dtype='float32',
-                   numerical_grads=True)
-
-    # specifying variable attributes on variable creation
-    # (in this case type codes must be used)
-    x = sym.Variable("x", dtype=0, shape=(1, 2))
-    check_function(x + 2*y, shape={y: (1, 2)}, dtype={'y': 'float32'}, numerical_grads=True)
-    y = sym.Variable("y", dtype=0, shape=(1, 2))
-
-    # shape overriding
-    def _fwd1(x, y):
-        assert x.shape == (1, 1)
-        assert y.shape == (1, 2)
-        return x + 2*y
-    check_function(x + 2*y, _fwd1, shape={x: (1, 1)})
-
-    # in_range
-    def _fwd2(x, y):
-        assert x.shape == (100,)
-        assert (x <= 0.9).all()
-        assert (x >= 0.8).all()
-        return x + 2*y
-    check_function(x + 2*y, _fwd2, shape=(100,), in_range=(0.8, 0.9), numerical_grads=False)
-    check_function(x + 2*y, _fwd2, shape=(100,), in_range={'x': (0.8, 0.9)}, numerical_grads=False)
-    check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0],
-                   in_range={'head_grads_0': (1.0, 1.0)})
-    # explicit passing of values
-    check_function(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0],
-                   values={'head_grads_0': np.full((1, 2), 1.0)})
-
-    # check that the function reports errors
-    def _check_function_must_fail(*args, **kwargs):
-        error = AssertionError
-        if 'error' in kwargs:
-            error = kwargs['error']
-            del kwargs['error']
-        try:
-            check_function(*args, quiet=True, **kwargs)
-        except error:
-            pass
-        else:
-            raise AssertionError("check_function didn't raise an exception")
-
-    _check_function_must_fail(x + 2*y, error=ValueError)
-    _check_function_must_fail(x + 2*y, lambda x, y: x + y)
-    _check_function_must_fail(x + 2*y, backward=lambda x, y, head_grads: [1.0, 2.0])
-    _check_function_must_fail(sym.block_grad(x + 2*y), numerical_grads=True)
-    _check_function_must_fail(x*x, numerical_grads=True,
-                              numerical_grads_params={'atol': 0.0, 'rtol': 0.0})
-    _check_function_must_fail(sym.log(-x*x), numerical_grads=True, error=ValueError)
-
-    # different styles of returning results from the forward function
-    check_function(x + 2*y, lambda x, y: [x + 2*y], numerical_grads=False)
-    _check_function_must_fail(x + 2*y, lambda x, y: [x + 2*y, x], numerical_grads=False,
-                              error=ValueError)
-    _check_function_must_fail(x + 2*y, lambda x, y: [], numerical_grads=False,
-                              error=ValueError)
-
-    # multiple outputs
-    z = sym.Group([2*x + y, x + 2*y])
-    check_function(z, lambda x, y: [2*x + y, x + 2*y])
-    check_function(z, lambda x, y: (2*x + y, x + 2*y))
-    check_function(z, backward=lambda x, y, head_grads: [2*head_grads[0] + head_grads[1],
-                                                         head_grads[0] + 2*head_grads[1]])
-    _check_function_must_fail(z, backward=lambda x, y, head_grads: [2*head_grads[0],
-                                                                    2*head_grads[1]])
-    check_function(z, backward=lambda x, y, head_grads: [head_grads[1], 2*head_grads[1]],
-                   in_range={'head_grads_0': (0, 0)})
-    check_function(z, numerical_grads=True)
-
-    z = sym.Group([sym.block_grad(2*x + y), x + 2*y])
-    check_function(z, lambda x, y: [2*x + y, x + 2*y], numerical_grads=False)
-    _check_function_must_fail(z, lambda x, y: [2*x + y, x + 2*y])
-    _check_function_must_fail(z, numerical_grads=True)
-
-    z = sym.Group([2*x + y, sym.block_grad(x + 2*y)])
-    _check_function_must_fail(z, numerical_grads=True)
-
-    z = sym.Group([2*x + y, x + 2*y, x, y, sym.sum(x)])
-    check_function(z, lambda x, y: [2*x + y, x + 2*y, x, y, np.sum(x)])
-
-    # passing additional parameters to forward and backward
-    def _fwd3(x, p):
-        assert p == 'v'
-        return x + 1
-    def _bwd3(x, p, head_grads):
-        assert p == 'v'
-        return head_grads
-    check_function(x + 1, _fwd3, _bwd3, additional_params={'p': 'v'})
-
-    # implicitly created variables and shape/dtype inference for inputs
-    x = sym.Variable("x", shape=(2, 3), dtype=0)
-    b = sym.Variable("b")
-    y = sym.dense(data=x, bias=b, units=4)
-    # Don't check gradients on cuda because is doesn't yet support ewise after reduce
-    check_function(y, exclude_targets={'cuda'}, numerical_grads=True)
-    check_function(y, shape={'x': (3, 4)}, exclude_targets={'cuda'}, numerical_grads=True)
-    check_function(y, dtype={'x': 'float64'}, exclude_targets={'cuda'}, numerical_grads=True)
-
-    x = sym.Variable("x")
-    b = sym.Variable("b")
-    w = sym.Variable("w")
-    y = sym.dense(data=x, bias=b, weight=w, units=4)
-    def _fwd_dense(x, w, b):
-        return np.dot(x, w.T) + b
-    check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'x': 'float32'}, numerical_grads=False)
-    check_function(y, _fwd_dense, shape={'x': (1,2)}, dtype={'w': 'float64'}, numerical_grads=False)
-    _check_function_must_fail(y, _fwd_dense, shape={'x': (1,2)},
-                              dtype={'w': 'float64', 'b': 'float32'},
-                              numerical_grads=False,
-                              error=nnvm._base.NNVMError)
-    # fails because no shape
-    _check_function_must_fail(y, _fwd_dense, numerical_grads=False, error=ValueError)
-    # ok because type is float32 by default
-    check_function(y, _fwd_dense, shape={'x': (1,2)}, numerical_grads=False)
-
-def test_relu():
-    x = sym.Variable("x")
-    y = sym.relu(sym.leaky_relu(x, alpha=0.3) - 0.2)
-
-    def forward(x):
-        x = (x < 0) * x * 0.3 + (x > 0) * x - 0.2
-        return (x > 0) * x
-
-    def backward(head_grads, x):
-        sub = (x < 0) * x * 0.3 + (x > 0) * x - 0.2
-        return [(sub > 0).astype("float") * \
-                ((x > 0).astype("float") + 0.3 * (x < 0).astype("float")) * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, shape=shape)
-
-def test_prelu_nchw():
-    x = sym.Variable("x")
-    a = sym.Variable("a")
-    y = sym.prelu(data=x, alpha=a)
-
-    def forward(x, a):
-        return (x < 0) * (x * a.reshape(3, 1, 1)) + (x>=0) * x
-
-    shape = {'x': (1, 3, 32, 32), 'a': (3,)}
-    check_function(y, forward, shape=shape)
-
-def test_prelu_nhwc():
-    x = sym.Variable("x")
-    a = sym.Variable("a")
-    y = sym.prelu(data=x, alpha=a, axis=3)
-
-    def forward(x, a):
-        return (x < 0) * (x * a.reshape(1, 1, 3)) + (x>=0) * x
-
-    shape = {'x': (1, 32, 32, 3), 'a': (3,)}
-    check_function(y, forward, shape=shape)
-
-def test_sym_scalar_pow():
-    scalar = 3
-    x = sym.Variable("x")
-    y = x**scalar
-
-    def forward(x):
-        return x**scalar
-
-    def backward(head_grads, x):
-        return [scalar * x**(scalar -  1) * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_scalar_sym_pow():
-    scalar = 3
-    x = sym.Variable("x")
-    y = scalar**x
-
-    def forward(x):
-        return scalar**x
-
-    def backward(head_grads, x):
-        return [np.log(scalar) * scalar**x * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_exp():
-    x = sym.Variable("x")
-    y = sym.exp(x)
-
-    def forward(x):
-        return np.exp(x)
-
-    def backward(head_grads, x):
-        return [np.exp(x) * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_log():
-    x = sym.Variable("x")
-    y = sym.log(x)
-
-    def forward(x):
-        return np.log(x)
-
-    def backward(head_grads, x):
-        return [1. / x * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, in_range=(0.002, 2.0), shape=shape)
-
-
-def test_tanh():
-    x = sym.Variable("x")
-    y = sym.tanh(x)
-
-    def forward(x):
-        return np.sinh(x) / np.cosh(x)
-
-    def backward(head_grads, x):
-        y_np = forward(x)
-        return [(1 - y_np**2) * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_sigmoid():
-    x = sym.Variable("x")
-    y = sym.sigmoid(x)
-
-    def forward(x):
-        return 1.0 / (1.0 + np.exp(-x))
-
-    def backward(head_grads, x):
-        y_np = forward(x)
-        return [y_np *(1 - y_np) * head_grads]
-
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_softmax():
-    x = sym.Variable("x")
-    y = sym.softmax(x)
-
-    def forward(x):
-        return topi.testing.softmax_python(x)
-
-    def backward(head_grads, x):
-        y = topi.testing.softmax_python(x)
-        grad = y * (head_grads - np.sum(y * head_grads, axis=1, keepdims=True))
-        return [grad]
-
-    check_function(y, forward, backward,
-                   shape={'x': (10, 1000)}, numerical_grads=False)
-    check_function(y, forward, backward,
-                   shape={'x': (2, 10)})
-
-
-def test_log_softmax():
-    x = sym.Variable("x")
-    y = sym.log_softmax(x)
-
-    def forward(x):
-        return topi.testing.log_softmax_python(x)
-
-    def backward(head_grads, x):
-        y = topi.testing.log_softmax_python(x)
-        grad = head_grads - np.exp(y) * np.sum(head_grads, axis=1, keepdims=True)
-        return [grad]
-
-    check_function(y, forward, backward,
-                   shape={'x': (10, 1000)}, numerical_grads=False)
-    check_function(y, forward, backward,
-                   shape={'x': (2, 10)})
-
-
-def test_dense():
-    x = sym.Variable("x", shape=(10, 100))
-    w = sym.Variable("dense_weight", shape=(3, 100))
-    b = sym.Variable("dense_bias", shape=(3,))
-    y = sym.dense(x, w, b, use_bias=True, units=3, name="dense")
-    y = sym.flatten(y)
-
-    def forward(x, dense_weight, dense_bias):
-        return np.dot(x, dense_weight.T) + dense_bias
-    shape = {
-        'x': (10, 100),
-        'w': (3, 100),
-        'b': (3,)
-    }
-    # Don't check gradients on cuda because is doesn't yet support ewise after reduce
-    check_function(y, forward, shape=shape,
-                   exclude_targets={'cuda'}, numerical_grads=True)
-    check_function(y, forward, shape=shape,
-                   only_targets={'cuda'}, numerical_grads=False)
-
-
-def test_batchnorm():
-    x = sym.Variable("x")
-    beta = sym.Variable("beta")
-    gamma = sym.Variable("gamma")
-    moving_var = sym.Variable("moving_var")
-    moving_mean = sym.Variable("moving_mean")
-    eps = 1e-5
-    y = sym.batch_norm(
-        x, gamma, beta, moving_mean, moving_var, epsilon=eps)
-
-    def forward(x, gamma, beta, moving_mean, moving_var):
-        return (x - moving_mean) / np.sqrt(moving_var + eps) * gamma + beta
-
-    shape = {
-        'x': (10, 20),
-        'gamma': (20,),
-        'beta': (20,),
-        'moving_mean': (20,),
-        'moving_var': (20,)
-    }
-
-    check_function(y, forward, in_range=(0.001, 1.0), shape=shape)
-
-
-def verify_concatenate(ishape, axis):
-    x = [sym.Variable("x%d" % i, shape=ishape[i]) for i in range(len(ishape))]
-    y = sym.concatenate(*x, axis=axis) + 1
-
-    def forward(**kwargs):
-        return np.concatenate(list(kwargs.values()), axis=axis) + 1
-
-    check_function(y, forward)
-
-
-def test_concatenate():
-    verify_concatenate([(2, 3, 4), (1, 3, 4)], axis=0)
-    verify_concatenate([(2, 4), (2, 7)], axis=1)
-
-
-def verify_split(ishape, indices_or_sections, axis):
-    x = sym.Variable("x", shape=ishape)
-    y = sym.split(x, indices_or_sections=indices_or_sections, axis=axis)
-
-    def forward(x):
-        return np.split(x, indices_or_sections, axis=axis)
-
-    check_function(y, forward)
-
-
-def test_split():
-    verify_split((2, 3), 2, axis=0)
-    verify_split((5, 3), [3], axis=0)
-    verify_split((5, 9, 3), [3, 4], axis=1)
-
-def verify_strided_slice(ishape, begin, end, strideinp=None):
-    stride = strideinp if strideinp else [1, 1, 1]
-    x = sym.Variable("x", shape=ishape)
-    if strideinp:
-        y = sym.strided_slice(x, begin = begin, end = end, stride = stride) + 1
-    else:
-        y = sym.strided_slice(x, begin = begin, end = end) + 1
-
-    for i in range(len(begin), 3):
-        begin.append(0)
-    for i in range(len(end), 3):
-        end.append(ishape[i])
-
-    def test_forward(x):
-        return x[begin[0]:end[0]:stride[0],
-                    begin[1]:end[1]:stride[1], begin[2]:end[2]:stride[2]] + 1
-
-    check_function(y, test_forward)
-
-def test_strided_slice():
-    verify_strided_slice((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2])
-    verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1])
-    verify_strided_slice((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1])
-    verify_strided_slice((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2])
-    verify_strided_slice((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1])
-    verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4, 3])
-    verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 1000, 3])
-    verify_strided_slice((3, 4, 3), [1, 1, 0], [4, 4])
-    verify_strided_slice((3, 4, 3), [1, 1], [4, 4, 3])
-
-def verify_take(src_shape, indices_src, axis=None):
-    src_dtype = "float32"
-    indices_dtype = "int32"
-    indices_src = np.array(indices_src, dtype=indices_dtype)
-    a = sym.Variable("a", shape=src_shape)
-    indices = sym.Variable("indices", shape=indices_src.shape)
-    y = sym.take(a, indices, axis=axis)
-
-    def forward(a, indices):
-        return np.take(a, indices=indices, axis=axis)
-
-    a_src = np.arange(np.prod(src_shape), dtype=src_dtype).reshape(src_shape)
-
-    check_function(y, forward,
-                   dtype={'a': src_dtype, 'indices': indices_dtype},
-                   values={'a': a_src, 'indices': indices_src})
-
-def test_take():
-    verify_take((4,), [1])
-    verify_take((4,), [[0,1,2,3]])
-    verify_take((3,3,3), [[11,25]])
-    verify_take((4,), [[0,1],[2,3]])
-    verify_take((4,), [1], 0)
-    verify_take((2,2), [[[1,0],[0,1]]], 0)
-    verify_take((2,2), [[[1,0],[0,1]]], 1)
-    verify_take((4,3,5,6), [[2,1,0,0]], -2)
-
-
-def verify_squeeze(shape, axis):
-    x = sym.Variable("x")
-    if axis is not None:
-        y = sym.squeeze(x, axis=axis)
-    else:
-        y = sym.squeeze(x)
-    y = y + 1
-
-    def forward(x):
-        return np.squeeze(x, axis=axis) + 1
-
-    def backward(head_grads, x):
-        return [np.reshape(head_grads, x.shape)]
-
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_squeeze():
-    verify_squeeze((1, 3, 2, 5), None)
-    verify_squeeze((1, 3, 1), axis=0)
-    verify_squeeze((1, 3, 2, 5, 1), axis=-1)
-
-
-def test_pad():
-    x = sym.Variable("x")
-    y = sym.pad(x, pad_width=((0, 0), (0, 0), (0, 1), (2, 3)), pad_value=1.)
-
-    def forward(x):
-        return np.pad(x,
-                      pad_width=((0, 0), (0, 0), (0, 1), (2, 3)),
-                      mode='constant', constant_values=1.)
-
-    shape = {'x': (1, 3, 28, 28)}
-    check_function(y, forward, shape=shape)
-
-def verify_lrn(ishape, size, axis, bias, alpha, beta):
-    x = sym.Variable("x", shape=ishape)
-    y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta)
-
-    def forward1(x):
-        return topi.testing.lrn_python(x, size, axis, bias, alpha, beta)
-
-    check_function(y, forward1)
-
-    def forward2(x):
-        y = forward1(x)
-        return (y > 0)*y
-
-    #Checking LRN op followed by elementwise op relu
-    check_function(sym.relu(y), forward2, in_range={'x': (-10.0, 10.0)})
-
-def verify_l2_normalize(ishape, eps, axis):
-    x = sym.Variable("x", shape=ishape)
-    y = sym.l2_normalize(x, eps=eps, axis=axis)
-
-    def forward1(x):
-        return topi.testing.l2_normalize_python(x, eps, axis)
-
-    check_function(y, forward1)
-
-    def forward2(x):
-        y = forward1(x)
-        return (y > 0)*y
-
-    #Checking L2 normalization op followed by elementwise op relu
-    check_function(sym.relu(y), forward2, in_range={'x': (-10.0, 10.0)})
-
-def test_lrn():
-    verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
-    verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
-
-def test_l2_normalize():
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
-
-def verify_gather_nd(src_shape, indices_src):
-    src_dtype = "float32"
-    indices_dtype = "int32"
-    indices_src = np.array(indices_src, dtype=indices_dtype)
-    a = sym.Variable("a", shape=src_shape)
-    indices = sym.Variable("indices", shape=indices_src.shape)
-    y = sym.gather_nd(a, indices)
-
-    def forward(a, indices):
-        return topi.testing.gather_nd_python(a, indices)
-
-    a_src = np.arange(np.prod(src_shape), dtype=src_dtype).reshape(src_shape)
-
-    check_function(y, forward,
-                   dtype={'a': src_dtype, 'indices': indices_dtype},
-                   values={'a': a_src, 'indices': indices_src})
-
-def test_gather_nd():
-    verify_gather_nd((4,), [[1]])
-    verify_gather_nd((4,), [[1, 3, 2]])
-    verify_gather_nd((2, 3), [[1]])
-    verify_gather_nd((2, 3), [[1], [0]])
-    verify_gather_nd((2, 3), [[1, 0], [0, 2]])
-    verify_gather_nd((2, 3, 4), [[1, 0], [0, 2]])
-    verify_gather_nd((2, 3, 4), [[1, 0], [0, 2], [3, 1]])
-    verify_gather_nd((2, 3, 4), [[[1, 0], [0, 1]], [[0, 2], [1, 2]],
-                                 [[3, 1], [0, 2]]])
-    verify_gather_nd((2, 3, 4, 5), [[1, 0], [0, 2]])
-    verify_gather_nd((2, 3, 4, 5), [[1, 0], [2, 1], [3, 2], [4, 2]])
-
-if __name__ == "__main__":
-    test_check_function()
-    test_split()
-    test_concatenate()
-    test_log_softmax()
-    test_batchnorm()
-    test_dense()
-    test_relu()
-    test_prelu_nchw()
-    test_prelu_nhwc()
-    test_sym_scalar_pow()
-    test_scalar_sym_pow()
-    test_exp()
-    test_log()
-    test_tanh()
-    test_sigmoid()
-    test_softmax()
-    test_squeeze()
-    test_pad()
-    test_take()
-    test_lrn()
-    test_l2_normalize()
-    test_strided_slice()
-    test_gather_nd()
diff --git a/nnvm/tests/python/compiler/test_top_level2.py b/nnvm/tests/python/compiler/test_top_level2.py
deleted file mode 100644
index b558428f0144..000000000000
--- a/nnvm/tests/python/compiler/test_top_level2.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-from tvm.contrib import graph_runtime
-import topi
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-
-
-def test_conv2d():
-    def run_test_conv2d(sym, dtype, dshape, kshape, oshape, shape_dict, padding):
-        for target, ctx in ctx_list():
-            graph, lib, _ = nnvm.compiler.build(sym, target, shape_dict)
-            m = graph_runtime.create(graph, lib, ctx)
-            data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-            kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-            bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
-            m.run(x=data, y_weight=kernel, y_bias=bias)
-            out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-            c_np = topi.testing.conv2d_nchw_python(
-                data.asnumpy(), kernel.asnumpy(), 1, padding)
-            c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
-            tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-    x = sym.Variable("x")
-    y = sym.conv2d(x, channels=10, kernel_size=(3,3),
-                   name="y", padding=(1,1))
-    dtype = "float32"
-    dshape = (1, 3, 18, 18)
-    kshape = (10, 3, 3, 3)
-    oshape = (1, 10, 18, 18)
-    shape_dict = {"x": dshape}
-    run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (1,1))
-
-    x = sym.Variable("x")
-    y = sym.conv2d(x, channels=10, kernel_size=(1,3),
-                   name="y", padding=(0,1))
-    dtype = "float32"
-    dshape = (1, 3, 224, 224)
-    kshape = (10, 3, 1, 3)
-    oshape = (1, 10, 224, 224)
-    shape_dict = {"x": dshape}
-    run_test_conv2d(y, dtype, dshape, kshape, oshape, shape_dict, (0,1))
-
-
-def test_mixed_precision():
-    x = sym.Variable("x")
-    dtype = "int8"
-    out_dtype="int32"
-    y = sym.conv2d(x,
-                   channels=10,
-                   kernel_size=(3,3),
-                   name="y",
-                   padding=(1,1),
-                   use_bias=False,
-                   out_dtype="int32")
-    dshape = (1, 3, 18, 18)
-    kshape = (10, 3, 3, 3)
-    oshape = (1, 10, 18, 18)
-    shape_dict = {"x": dshape}
-    dtype_dict = {"x": dtype}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(-127, 127, size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(-127, 127, size=kshape).astype(dtype))
-        m.run(x=data, y_weight=kernel)
-        out = m.get_output(0, tvm.nd.empty(oshape, out_dtype))
-        c_np = topi.testing.conv2d_nchw_python(
-            data.asnumpy().astype(out_dtype),
-            kernel.asnumpy().astype(out_dtype), 1, 1)
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_dilated_conv2d():
-    dilation = 3
-    x = sym.Variable("x")
-    y = sym.conv2d(x, channels=10, kernel_size=(3, 3), dilation=(dilation, dilation),
-                   name="y", padding=(1, 1))
-    dtype = "float32"
-    dshape = (1, 3, 18, 18)
-    kshape = (10, 3, 3, 3)
-    oshape = (1, 10, 14, 14)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
-        kernel_np = np.random.uniform(size=kshape).astype(dtype)
-        kernel = tvm.nd.array(kernel_np)
-        dkernel_np = topi.testing.dilate_python(kernel_np, (1, 1, dilation, dilation))
-        m.run(x=data, y_weight=kernel, y_bias=bias)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        c_np = topi.testing.conv2d_nchw_python(
-            data.asnumpy(), dkernel_np, 1, 1)
-        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_grouped_conv2d_nchw():
-    x = sym.Variable("x")
-    y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32,
-                   name="y", padding=(1,1))
-    dtype = "float32"
-    dshape = (1, 32, 18, 18)
-    kshape = (32, 1, 3, 3)
-    oshape = (1, 32, 18, 18)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-        bias = tvm.nd.array(np.random.uniform(size=kshape[0]).astype(dtype))
-        m.run(x=data, y_weight=kernel, y_bias=bias)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        c_np = topi.testing.depthwise_conv2d_python_nchw(
-            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
-        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-def test_grouped_conv2d_nhwc():
-    x = sym.Variable("x")
-    y = sym.conv2d(x, channels=32, kernel_size=(3,3), groups=32,
-                   name="y", padding=(1,1), layout="NHWC", kernel_layout ='HWOI')
-    dtype = "float32"
-    dshape = (1, 18, 18, 32)
-    kshape = (3, 3, 32, 1)
-    oshape = (1, 18, 18, 32)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-        bias = tvm.nd.array(np.random.uniform(size=kshape[2]).astype(dtype))
-        m.run(x=data, y_weight=kernel, y_bias=bias)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        c_np = topi.testing.depthwise_conv2d_python_nhwc(
-            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
-        c_np = c_np + bias.asnumpy().reshape(1, 1, kshape[2])
-        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
-
-
-def test_conv2d_transpose():
-    x = sym.Variable("x")
-    y = sym.conv2d_transpose(x, channels=10, kernel_size=(3,3), strides=(2,2),
-                             name="y", padding=(1,1), output_padding=(2,2))
-    dtype = "float32"
-    dshape = (1, 3, 18, 18)
-    kshape = (3, 10, 3, 3)
-    oshape = (1, 10, 37, 37)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        kernel = tvm.nd.array(np.random.uniform(size=kshape).astype(dtype))
-        bias = tvm.nd.array(np.random.uniform(size=kshape[1]).astype(dtype))
-        m.run(x=data, y_weight=kernel, y_bias=bias)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        c_np = topi.testing.conv2d_transpose_nchw_python(
-            data.asnumpy(), kernel.asnumpy(), 2, 1)
-        c_np = c_np + bias.asnumpy().reshape(kshape[1], 1, 1)
-        d_np = np.zeros(shape=oshape)
-        d_np[:,:,0:c_np.shape[2],0:c_np.shape[3]] = c_np
-        tvm.testing.assert_allclose(out.asnumpy(), d_np, rtol=1e-5)
-
-
-def test_max_pool2d():
-    x = sym.Variable("x")
-    y = sym.max_pool2d(x, pool_size=(2,2), strides=(2,2),
-                       padding=(0,0), name="y", ceil_mode=True)
-    dtype = "float32"
-    dshape = (1, 3, 28, 28)
-    oshape = (1, 3, 14, 14)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = np.max(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5))
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_avg_pool2d():
-    x = sym.Variable("x")
-    y = sym.avg_pool2d(x, pool_size=(2,2), strides=(2,2), padding=(0,0), name="y")
-    dtype = "float32"
-    dshape = (1, 3, 28, 28)
-    oshape = (1, 3, 14, 14)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = np.mean(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5))
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_avg_pool2d_no_count_pad():
-    kh, kw = (4, 4)
-    sh, sw = (2, 2)
-    ph, pw = (2, 2)
-
-    x = sym.Variable("x")
-    y = sym.avg_pool2d(x, pool_size=(kh, kw), strides=(sw, sw), padding=(ph, pw),
-                       name="y", count_include_pad=False)
-    dtype = "float32"
-    n = 1
-    (ic, ih, iw) = (3, 28, 28)
-    (oc, oh, ow) = (3, 15, 15)
-
-    a_np = np.random.uniform(low=0.001, size=(n, ic, ih, iw)).astype(dtype)
-    pad_np = np.zeros(shape=(n, ic, ih+2*ph, iw+2*pw)).astype(dtype)
-    no_zero = (range(n), range(ic), (range(ph, ih+ph)), (range(pw, iw+pw)))
-    pad_np[np.ix_(*no_zero)] = a_np
-    b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype)
-
-    for i in range(oh):
-        for j in range(ow):
-            pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3))
-            b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw],
-                                   axis=(2,3)) / np.maximum(pad_count, 1)
-    b_np = np.maximum(b_np, 0.0)
-    shape_dict = {"x": (n, ic, ih, iw)}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(a_np)
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty((n, oc, oh, ow), dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_global_max_pool2d():
-    x = sym.Variable("x")
-    y = sym.global_max_pool2d(x, name="y")
-    dtype = "float32"
-    dshape = (1, 1024, 7, 7)
-    oshape = (1, 1024, 1, 1)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = np.max(data.asnumpy(), axis=(2,3), keepdims=True)
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_global_avg_pool2d():
-    x = sym.Variable("x")
-    y = sym.global_avg_pool2d(x, name="y")
-    dtype = "float32"
-    dshape = (1, 1024, 7, 7)
-    oshape = (1, 1024, 1, 1)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = np.mean(data.asnumpy(), axis=(2,3), keepdims=True)
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-
-def test_upsampling_nearest_neighbor():
-    x = sym.Variable("x")
-    scale = 2
-    y = sym.upsampling(x, scale=scale, name="y")
-    dtype = "float32"
-    dshape = (1, 16, 32, 32)
-    oshape = (1, 16, 32*scale, 32*scale)
-    shape_dict = {"x": dshape}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        a_np = np.random.uniform(size=dshape).astype(dtype)
-        data = tvm.nd.array(a_np)
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = topi.testing.upsampling_python(a_np, (scale, scale), "NCHW")
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
-
-def test_upsampling_bilinear():
-    x = sym.Variable("x")
-    scale = 2
-    y = sym.upsampling(x, scale=scale, method="BILINEAR", name="y", layout="NCHW")
-    dtype = "float32"
-    dshape = (1, 4, 32, 32)
-    oshape = (1, 4, 32*scale, 32*scale)
-    shape_dict = {"x": dshape}
-    dtype_dict = {"x": dtype}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        a_np = np.random.uniform(size=dshape).astype(dtype)
-        data = tvm.nd.array(a_np)
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = topi.testing.bilinear_resize_python(a_np, (32*scale, 32*scale), "NCHW", align_corners=False)
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
-
-def test_resize_bilinear():
-    x = sym.Variable("x")
-    y = sym.resize(x, size=(60, 60), method="BILINEAR", name="y", layout="NHWC", align_corners=True)
-    dtype = "float32"
-    dshape = (1, 32, 32, 4)
-    oshape = (1, 60, 60, 4)
-    shape_dict = {"x": dshape}
-    dtype_dict = {"x": dtype}
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, shape_dict, dtype_dict)
-        m = graph_runtime.create(graph, lib, ctx)
-        a_np = np.random.uniform(size=dshape).astype(dtype)
-        data = tvm.nd.array(a_np)
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        b_np = topi.testing.bilinear_resize_python(a_np, (60, 60), "NHWC")
-        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
-
-if __name__ == "__main__":
-    test_mixed_precision()
-    test_conv2d()
-    test_dilated_conv2d()
-    test_grouped_conv2d_nchw()
-    test_grouped_conv2d_nhwc()
-    test_conv2d_transpose()
-    test_max_pool2d()
-    test_avg_pool2d()
-    test_avg_pool2d_no_count_pad()
-    test_global_max_pool2d()
-    test_global_avg_pool2d()
-    test_upsampling_nearest_neighbor()
-    test_upsampling_bilinear()
-    test_resize_bilinear()
diff --git a/nnvm/tests/python/compiler/test_top_level3.py b/nnvm/tests/python/compiler/test_top_level3.py
deleted file mode 100644
index c60f0450b30a..000000000000
--- a/nnvm/tests/python/compiler/test_top_level3.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm.testing.check_computation import check_function
-
-def check_map(symfunc, np_func, np_backward=None, dtype="float32", rnd_min=-1, rnd_max=1):
-    x = sym.Variable("x")
-    y = symfunc(x)
-    shape = {'x': (1, 3, 32, 32)}
-    check_function(y, lambda x: np_func(x), np_backward,
-                   dtype=dtype, shape=shape, in_range=(rnd_min, rnd_max))
-
-
-def test_floor():
-    check_map(sym.floor, np.floor)
-
-def test_ceil():
-    check_map(sym.ceil, np.ceil)
-
-def test_trunc():
-    check_map(sym.trunc, np.trunc)
-
-def test_round():
-    check_map(sym.round, np.round)
-
-def test_abs():
-    check_map(sym.abs, np.abs)
-    check_map(sym.abs, np.abs, dtype = "int32")
-    check_map(sym.abs, np.abs, dtype = "int8")
-
-def test_shift():
-    n = 3
-    for dtype in ["int32", "int8"]:
-        check_map(lambda x : x >> n, lambda x: x >> n, dtype=dtype, rnd_min=-100, rnd_max=100)
-        check_map(lambda x : x << n, lambda x: x << n, dtype=dtype, rnd_min=-100, rnd_max=100)
-
-if __name__ == "__main__":
-    test_shift()
-    test_floor()
-    test_ceil()
-    test_round()
-    test_abs()
-    test_trunc()
diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py
deleted file mode 100644
index 691163974470..000000000000
--- a/nnvm/tests/python/compiler/test_top_level4.py
+++ /dev/null
@@ -1,746 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import math
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-import topi
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm.testing.check_computation import check_function
-
-def verify_transpose(dshape, axes):
-    x = sym.Variable("x")
-    if axes:
-        y = sym.transpose(x, axes=axes)
-    else:
-        y = sym.transpose(x)
-    y = y + 1
-    dtype = "float32"
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = graph_runtime.create(graph, lib, ctx)
-        # set input
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.run(x=data)
-        out_np = np.transpose(data.asnumpy(), axes=axes) + 1
-        out = m.get_output(0, tvm.nd.empty(out_np.shape))
-        tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
-
-def verify_reduce_explicit(dshape, data, result, fsym, oshape=None, otype='float32', **kwargs):
-    """ Verify reduce operations by comparign its result with `result` """
-    x = sym.Variable("x")
-    y = fsym(x + 0, **kwargs)
-    for target, ctx in ctx_list():
-        # TODO(yuruofei): remove when cuda reduce schedule is done
-        if target == 'cuda' and fsym == sym.mean:
-            continue
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = graph_runtime.create(graph, lib, ctx)
-        # set input
-        m.run(x=data)
-        # oshape set to None means do not test the shape-correctness
-        oshape = result.shape if isinstance(result, np.ndarray) else (1,) if oshape is None else oshape
-        out = m.get_output(0, tvm.nd.empty(oshape, dtype=otype))
-        if isinstance(result, np.ndarray):
-            np.testing.assert_equal(out.asnumpy().shape, result.shape)
-            tvm.testing.assert_allclose(out.asnumpy(), result, atol=1e-5, rtol=1e-5)
-        else:
-            tvm_out = out.asnumpy()
-            assert abs(result - tvm_out) <= (1e-5 + 1e-5 * abs(tvm_out))
-
-def verify_reduce(dshape, fnp, fsym, oshape=None, otype='float32', **kwargs):
-    """ Verify reduce operations by generating data at random and calling numpy
-    version as reference """
-    data = np.random.uniform(size=dshape).astype(otype)
-    result = fnp(data + 0, **kwargs)
-    verify_reduce_explicit(dshape, data, result, fsym, oshape=oshape, otype=otype, **kwargs)
-
-def verify_collapse(dshape, target_shape, fnp):
-    x = sym.Variable("x", shape=dshape)
-    t = sym.Variable("t", shape=target_shape)
-    y = sym.collapse_sum(x, t)
-    dtype = "float32"
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target,
-                                            {"x": dshape, "t": target_shape})
-        m = graph_runtime.create(graph, lib, ctx)
-        data = np.random.uniform(size=dshape).astype(dtype)
-        m.run(x=data)
-        out = m.get_output(0, tvm.nd.empty(target_shape))
-        out_np = fnp(data)
-        tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
-
-
-def test_transpose():
-    verify_transpose((2, 3, 4), (0, 2, 1))
-    verify_transpose((2, 3, 4), None)
-
-
-def test_reduce():
-
-    def _with_keepdims(func):
-        """ Wrapper around numpy's argmax/argmin with `keepdims` argument supported """
-        def wrapper(data, axis=None, keepdims=False):
-            if not keepdims:
-                return func(data, axis=axis)
-            else:
-                if axis is not None:
-                    out_shape = list(data.shape)
-                    out_shape[axis] = 1
-                else:
-                    out_shape = [1 for _ in range(len(data.shape))]
-                return func(data, axis=axis).reshape(out_shape)
-        return wrapper
-
-    verify_reduce((2, 3, 4), np.max, sym.max, axis=1, keepdims=True)
-    verify_reduce((4, 4, 3), np.min, sym.min, keepdims=True)
-    verify_reduce((4, 4, 3), np.sum, sym.sum, axis=(0, 2))
-    verify_reduce((4, 4, 3), np.sum, sym.sum)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1), keepdims=False)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 2), keepdims=False)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1), keepdims=True)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 2), keepdims=True)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, keepdims=True)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, keepdims=False)
-    verify_reduce((128, 24, 128), np.mean, sym.mean, axis=(0, 1, 2), keepdims=True)
-
-    data = np.array([[[1,2],[3,4]],[[3,44],[5,6]]], dtype=np.float32)
-    verify_reduce_explicit([2,2,2], data, np.array([[1,1],[1,0]]), sym.argmax, otype='int32', axis=[0,2], exclude=True)
-    verify_reduce_explicit([2,2,2], data, np.array([[0,0],[0,1]]), sym.argmin, otype='int32', axis=[0,2], exclude=True)
-    shape = [4, 4, 3]
-    for axis in [None, 0, 1, 2]:
-        for keepdims in [True,False]:
-            kwargs = { 'keepdims':keepdims }
-            if axis is None:
-                # FIXME: NNVM doesn't support setting `axis=None` explicitly.
-                kwargs.update({'oshape': [1,1,1] if keepdims else [1] })
-            else:
-                kwargs.update({'axis': axis})
-                kwargs.update({'oshape': shape[:axis]+[1]+shape[axis+1:] if keepdims else shape[:axis]+shape[axis+1:]})
-
-            verify_reduce(shape, _with_keepdims(np.argmax), sym.argmax, otype='int32', **kwargs)
-            verify_reduce(shape, _with_keepdims(np.argmin), sym.argmin, otype='int32', **kwargs)
-
-
-def test_collapse():
-    verify_collapse((2, 3, 4), (1,), lambda x: x.sum())
-    verify_collapse((2, 3, 4), (1, 1, 1), lambda x: x.sum(keepdims=True))
-    verify_collapse((2, 3, 4), (1, 1), lambda x: x.sum().reshape(1, 1))
-    verify_collapse((2, 3, 4), (1, 4), lambda x: x.reshape(-1, 4).sum(0, keepdims=True))
-    verify_collapse((2, 3, 4), (3, 4), lambda x: x.sum(0))
-    verify_collapse((2, 3, 4), (1, 3, 4), lambda x: x.sum(0, keepdims=True))
-    verify_collapse((2, 3, 4), (1, 1, 4), lambda x: x.sum((0, 1), keepdims=True))
-    verify_collapse((2, 3, 4), (2, 1, 4), lambda x: x.sum(1, keepdims=True))
-    verify_collapse((2, 3, 4), (2, 1, 1), lambda x: x.sum((1, 2), keepdims=True))
-    verify_collapse((2, 3, 4), (2, 3, 1), lambda x: x.sum(2, keepdims=True))
-    verify_collapse((2, 3, 4), (2, 3, 4), lambda x: x)
-
-
-def verify_flip(ishape, axis):
-    x = sym.Variable("x")
-    y = sym.flip(x, axis=axis) + 1
-    dtype = "float32"
-    x_np = np.random.uniform(size=ishape).astype(dtype)
-    res = np.flip(x_np, axis) + 1
-
-    for target, ctx in ctx_list():
-        # set input
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run(x=x_np)
-        out = m.get_output(0, tvm.nd.empty(res.shape))
-        tvm.testing.assert_allclose(out.asnumpy(), res, atol=1e-5, rtol=1e-5)
-
-
-def test_flip():
-    verify_flip((3, 4, 3), 1)
-    verify_flip((3, 4, 3), 0)
-    verify_flip((3, 4, 3), 2)
-    verify_flip((3, 4, 3), -1)
-    verify_flip((3, 4, 3), -3)
-    verify_flip((3, 4, 3), -2)
-
-
-def verify_reshape(dshape, oshape):
-    x = sym.Variable("x")
-    y = sym.reshape(x, shape=oshape)
-    y = y + 1
-    dtype = "float32"
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-        m = graph_runtime.create(graph, lib, ctx)
-        # set input
-        data = tvm.nd.array(np.random.uniform(size=dshape).astype(dtype))
-        m.run(x=data)
-        out_np = data.asnumpy().reshape(oshape) + 1
-        out = m.get_output(0, tvm.nd.empty(out_np.shape))
-        tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
-
-
-def test_reshape():
-    verify_reshape((2, 3, 4), (-1, 2, 1))
-    verify_reshape((2, 3, 4), (8, 3))
-    verify_reshape((4, 7), (2, 7, 2))
-
-
-def test_clip():
-    x = sym.Variable("x")
-    a_min=0.2
-    a_max=0.75
-    y = sym.clip(x, a_min=a_min, a_max=a_max)
-
-    def forward(x):
-        return np.clip(x, a_min=a_min, a_max=a_max)
-
-    def backward(head_grads, x):
-        mask1 = np.greater_equal(x, a_min).astype("float")
-        mask2 = np.less_equal(x, a_max).astype("float")
-        return [head_grads * mask1 * mask2]
-
-    shape = {'x': (3, 4, 5)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_broadcast():
-    a = sym.Variable("a")
-    b = sym.Variable("b")
-    shape = {'a': (3, 4, 5), 'b': (1, 5)}
-
-    def _collapse(g):
-        return g.reshape(-1, shape['b'][-1]).sum(0, keepdims=True)
-
-    y = sym.broadcast_add(a, b)
-    def _backward_add(head_grads, a, b):
-        da = head_grads
-        db = _collapse(head_grads)
-        return da, db
-    check_function(y, lambda a, b: a + b, _backward_add, shape=shape)
-
-    y = sym.broadcast_sub(a, b)
-    def _backward_sub(head_grads, a, b):
-        da = head_grads
-        db = -_collapse(head_grads)
-        return da, db
-    check_function(y, lambda a, b: a - b, _backward_sub, shape=shape)
-
-    y = sym.broadcast_mul(a, b)
-    def _backward_mul(head_grads, a, b):
-        da = head_grads * b
-        db = _collapse(head_grads * a)
-        return da, db
-    check_function(y, lambda a, b: a * b, _backward_mul, shape=shape)
-
-    y = sym.broadcast_div(a, b)
-    def _backward_div(head_grads, a, b):
-        da = head_grads / b
-        db = _collapse(- head_grads * a / b**2)
-        return da, db
-    # We avoid computing numerical derivatives too close to zero here
-    check_function(y, lambda a, b: a / b, _backward_div, shape=shape, numerical_grads=False)
-    check_function(y, lambda a, b: a / b, _backward_div, shape=shape,
-                   in_range={'b': (0.1, 20)})
-
-    y = sym.broadcast_mod(a, b)
-    check_function(y,
-                   lambda a, b: np.mod(a, b),
-                   in_range={'a': (0.001, 100), 'b': (1, 100)}, dtype='int32', shape=shape)
-
-    y = sym.broadcast_max(a, b)
-    check_function(y, lambda a, b: np.maximum(a, b), shape=shape)
-
-    y = sym.broadcast_min(a, b)
-    check_function(y, lambda a, b: np.minimum(a, b), shape=shape)
-
-    y = sym.broadcast_pow(a, b)
-    check_function(y,
-                   lambda a, b: np.power(a, b),
-                   in_range={'a': (0.001, 100), 'b': (0.001, 2)}, shape=shape)
-
-    y = sym.broadcast_left_shift(a, b)
-    check_function(y, lambda a, b: a << b, dtype='int32', shape=shape)
-
-    y = sym.broadcast_right_shift(a, b)
-    check_function(y, lambda a, b: a >> b, dtype='int32', shape=shape)
-
-    y = sym.broadcast_greater(a, b)
-    check_function(y, lambda a, b: np.greater(a, b), shape=shape)
-
-    y = sym.broadcast_less(a, b)
-    check_function(y, lambda a, b: np.less(a, b), shape=shape)
-
-    y = sym.broadcast_equal(a, b)
-    check_function(y, lambda a, b: np.equal(a, b),
-                   in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape)
-
-    y = sym.broadcast_not_equal(a, b)
-    check_function(y, lambda a, b: np.not_equal(a, b),
-                   in_range={'a': (-2, 2), 'b': (-2, 2)}, dtype='int32', shape=shape)
-
-    y = sym.broadcast_greater_equal(a, b)
-    check_function(y, lambda a, b: np.greater_equal(a, b),
-                   in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape)
-
-    y = sym.broadcast_less_equal(a, b)
-    check_function(y, lambda a, b: np.less_equal(a, b),
-                   in_range={'a': (-3, 3), 'b': (-3, 3)}, dtype='int32', shape=shape)
-
-def test_greater():
-    l = sym.Variable("l")
-    r = sym.Variable("r")
-    y = sym.greater(l, r)
-
-    def forward(l, r):
-        return np.greater(l, r).astype("float32")
-
-    def backward(head_grads, l, r):
-        return {'l': np.zeros_like(l)}
-
-    shape = {'l': (3, 4, 5), 'r': (3, 4, 5)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_less():
-    l = sym.Variable("l")
-    r = sym.Variable("r")
-    y = sym.less(l, r)
-
-    def forward(l, r):
-        return np.less(l, r).astype("float32")
-
-    def backward(head_grads, l, r):
-        return {'l': np.zeros_like(l)}
-
-    shape = {'l': (3, 4, 5), 'r': (3, 4, 5)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_reshape_like():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = sym.reshape_like(x, y)
-
-    def forward(x, y):
-        return np.reshape(x, y.shape)
-
-    def backward(head_grads, x, y):
-        return [np.reshape(head_grads, x.shape),
-                np.zeros_like(y)]
-
-    shape = {'x': (3, 4, 5), 'y': (5, 4, 3)}
-    check_function(z, forward, backward, shape=shape)
-
-
-def verify_expand_like(in_shape, out_shape, axis, exclude):
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z = sym.expand_like(x, y, axis=axis, exclude=exclude)
-
-    def forward(x, y):
-        odim = len(out_shape)
-
-        if len(x.shape) == len(y.shape):
-            return np.broadcast_to(x, y.shape)
-
-        if x.shape == (1,) and len(y.shape) == odim:
-            x = np.reshape(x, ())
-
-        real_axis = [i if i >= 0 else i + odim for i in axis]
-        real_axis = sorted(real_axis)
-        if exclude:
-            real_axis = list(set(range(odim)) - set(real_axis))
-        for i in real_axis:
-            x = np.expand_dims(x, i).astype(x.dtype)
-        for i in real_axis:
-            x = np.concatenate([x]*out_shape[i], axis=i).astype(x.dtype)
-
-        return x
-
-    def backward(head_grads, x, y):
-        odim = len(out_shape)
-
-        keepdims = len(x.shape) == len(y.shape)
-
-        if x.shape == (1,) and len(y.shape) == odim:
-            x = np.reshape(x, ())
-
-        real_axis = [i if i >= 0 else i + odim for i in axis]
-        real_axis = sorted(real_axis)
-        if exclude:
-            real_axis = list(set(range(odim)) - set(real_axis))
-        return [np.sum(head_grads, axis=tuple(real_axis), keepdims=keepdims),
-                np.zeros_like(y)]
-
-
-    shape = {'x': in_shape, 'y': out_shape}
-    check_function(z, forward, backward, shape=shape)
-
-
-def test_expand_like():
-    verify_expand_like((3,), (3, 2), [1], False)
-    verify_expand_like((2,), (2, 3), [1], False)
-    verify_expand_like((3, 4), (3, 5, 4), [1], False)
-    verify_expand_like((5, 7), (5, 6, 7, 8), [0, 2], True)
-    verify_expand_like((2, 3), (2, 3), [], False)
-    verify_expand_like((1,), (2, 3), [0, 1], False)
-    verify_expand_like((1, 1), (2, 3), [0, 1], False)
-    verify_expand_like((2, 1), (2, 3), [1], False)
-    verify_expand_like((1, 3), (2, 3), [0], False)
-
-
-def verify_elemwise_sum(num_args):
-    s = [sym.Variable("input" + str(i)) for i in range(num_args)]
-    y = sym.elemwise_sum(*s, num_args=num_args)
-
-    def forward(**inputs):
-        return np.sum(np.array(list(inputs.values())), axis=0)
-
-    def backward(head_grads, **inputs):
-        return [head_grads] * num_args
-
-    shape = {s[i]: (3, 4, 5) for i in range(num_args)}
-    check_function(y, forward, backward, shape=shape)
-
-
-def test_elemwise_sum():
-    verify_elemwise_sum(1)
-    verify_elemwise_sum(5)
-    verify_elemwise_sum(7)
-
-
-def test_block_grad():
-    x = sym.Variable("x")
-    y = sym.block_grad(x)
-
-    def forward(x):
-        return x
-
-    def backward(head_grads, x):
-        return [np.zeros_like(head_grads)]
-
-
-    shape = {'x': (3, 4, 5)}
-    # Numerical grad checking would fail for this function
-    check_function(y, forward, backward, shape=shape, numerical_grads=False)
-
-
-def test_full():
-    shape = (3, 4, 5)
-    value = 7
-    dtype = "float32"
-    for target, ctx in ctx_list():
-        data = sym.Variable("data", dtype=dtype)
-        # full_like
-        s = sym.full_like(data=data, fill_value=value, name="s")
-        graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run(data=np.random.uniform(size=shape).astype(dtype))
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),
-            np.full(shape, fill_value=value, dtype=dtype),
-            atol=1e-5, rtol=1e-5)
-        # ones_like
-        s = sym.ones_like(data=data, fill_value=value, name="s")
-        graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run(data=np.random.uniform(size=shape).astype(dtype))
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),
-            np.full(shape, fill_value=1, dtype=dtype),
-            atol=1e-5, rtol=1e-5)
-        # zeros_like
-        s = sym.zeros_like(data=data, fill_value=value, name="s")
-        graph, lib, _ = nnvm.compiler.build(s, target, {"data": shape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run(data=np.random.uniform(size=shape).astype(dtype))
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),
-            np.full(shape, fill_value=0, dtype=dtype),
-            atol=1e-5, rtol=1e-5)
-        # full
-        s = sym.full(shape=shape, dtype=dtype, fill_value=value, name="s")
-        graph, lib, _ = nnvm.compiler.build(s, target)
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run()
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),
-            np.full(shape, fill_value=value, dtype=dtype),
-            atol=1e-5, rtol=1e-5)
-        # ones
-        s = sym.ones(shape=shape, dtype=dtype, name="s")
-        graph, lib, _ = nnvm.compiler.build(s, target)
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run()
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),
-            np.full(shape, fill_value=1, dtype=dtype),
-            atol=1e-5, rtol=1e-5)
-        # zeros
-        s = sym.zeros(shape=shape, dtype=dtype, name="s")
-        graph, lib, _ = nnvm.compiler.build(s, target)
-        m = graph_runtime.create(graph, lib, ctx)
-        m.run()
-        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        tvm.testing.assert_allclose(
-            out.asnumpy(),
-            np.full(shape, fill_value=0, dtype=dtype),
-            atol=1e-5, rtol=1e-5)
-
-def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1),
-                          offsets=(0.5, 0.5), clip=False):
-    data = sym.Variable("data")
-    out = sym.multibox_prior(data=data, sizes=sizes, ratios=ratios, steps=steps,
-                             offsets=offsets, clip=clip)
-
-    in_height = dshape[2]
-    in_width = dshape[3]
-    num_sizes = len(sizes)
-    num_ratios = len(ratios)
-    size_ratio_concat = sizes + ratios
-    steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height
-    steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width
-    offset_h = offsets[0]
-    offset_w = offsets[1]
-
-    oshape = (1, in_height * in_width * (num_sizes + num_ratios - 1), 4)
-    dtype = "float32"
-    np_out = np.zeros(oshape).astype(dtype)
-
-    for i in range(in_height):
-        center_h = (i + offset_h) * steps_h
-        for j in range(in_width):
-            center_w = (j + offset_w) * steps_w
-            for k in range(num_sizes + num_ratios - 1):
-                w = size_ratio_concat[k] * in_height / in_width / 2.0 if k < num_sizes else \
-                    size_ratio_concat[0] * in_height / in_width * math.sqrt(size_ratio_concat[k + 1]) / 2.0
-                h = size_ratio_concat[k] / 2.0 if k < num_sizes else \
-                    size_ratio_concat[0] / math.sqrt(size_ratio_concat[k + 1]) / 2.0
-                count = i * in_width * (num_sizes + num_ratios - 1) + j * (num_sizes + num_ratios - 1) + k
-                np_out[0][count][0] = center_w - w
-                np_out[0][count][1] = center_h - h
-                np_out[0][count][2] = center_w + w
-                np_out[0][count][3] = center_h + h
-    if clip:
-        np_out = np.clip(np_out, 0, 1)
-
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input("data", np.random.uniform(size=dshape).astype(dtype))
-        m.run()
-        tvm_out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype))
-        tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)
-
-def test_multibox_prior():
-    verify_multibox_prior((1, 3, 50, 50))
-    verify_multibox_prior((1, 3, 224, 224), sizes=(0.5, 0.25, 0.1), ratios=(1, 2, 0.5))
-    verify_multibox_prior((1, 32, 32, 32), sizes=(0.5, 0.25), ratios=(1, 2), steps=(2, 2), clip=True)
-
-def test_multibox_transform_loc():
-    batch_size = 1
-    num_anchors = 3
-    num_classes = 3
-    cls_prob = sym.Variable("cls_prob")
-    loc_preds = sym.Variable("loc_preds")
-    anchors = sym.Variable("anchors")
-    transform_loc_data, valid_count = sym.multibox_transform_loc(cls_prob=cls_prob, loc_pred=loc_preds,
-                                                                 anchor=anchors)
-    out = sym.non_max_suppression(data=transform_loc_data, valid_count=valid_count, return_indices=False)
-
-    # Manually create test case
-    np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]])
-    np_loc_preds = np.array([[0.1, -0.2, 0.3, 0.2, 0.2, 0.4, 0.5, -0.3, 0.7, -0.2, -0.4, -0.8]])
-    np_anchors = np.array([[[-0.1, -0.1, 0.1, 0.1], [-0.2, -0.2, 0.2, 0.2], [1.2, 1.2, 1.5, 1.5]]])
-
-    expected_np_out = np.array([[[1, 0.69999999, 0, 0, 0.10818365, 0.10008108],
-                                 [0, 0.44999999, 1, 1, 1, 1],
-                                 [0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]])
-
-    dtype = "float32"
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes),
-                                                          "loc_preds": (batch_size, num_anchors * 4),
-                                                          "anchors": (1, num_anchors, 4)})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)})
-        m.run()
-        tvm_out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype))
-        tvm.testing.assert_allclose(tvm_out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5)
-
-def test_non_max_suppression():
-    dshape = (1, 5, 6)
-    data = sym.Variable("data")
-    valid_count = sym.Variable("valid_count", dtype="int32")
-    iou_threshold = 0.7
-    force_suppress = True
-    top_k = 2
-    out = sym.non_max_suppression(data=data, valid_count=valid_count, return_indices=False,
-                                  iou_threshold=iou_threshold, force_suppress=force_suppress, top_k=top_k)
-
-    np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80],
-                         [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79],
-                         [1, 0.5, 100, 60, 70, 110]]]).astype("float32")
-    np_valid_count = np.array([4]).astype("int32")
-    np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45],
-                           [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1],
-                           [-1, -1, -1, -1, -1, -1]]])
-
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)},
-                                            dtype={"data": "float32", "valid_count": "int32"})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input(**{"data": np_data, "valid_count": np_valid_count})
-        m.run()
-        tvm_out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32"))
-        tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
-
-def np_slice_like(np_data, np_shape_like, axis=[]):
-    begin_idx = [0 for _ in np_data.shape]
-    end_idx = list(np_data.shape)
-    if len(axis) > 0:
-        for i in axis:
-            if i < 0:
-                i = len(np_data.shape) + i
-            end_idx[i] = np_shape_like.shape[i]
-    else:
-        for i in range(len(np_data.shape)):
-            if i < len(np_shape_like.shape):
-                end_idx[i] = np_shape_like.shape[i]
-    slice_idx = []
-    for b, e in zip(begin_idx, end_idx):
-        slice_idx.append(slice(b, e))
-    np_result = np_data[slice_idx]
-    return np_result
-
-def verify_slice_like(np_data, np_shape_like, axis=[]):
-    dtype = "float32"
-    np_data = np_data.astype(dtype)
-    np_shape_like = np_shape_like.astype(dtype)
-    np_result = np_slice_like(np_data, np_shape_like, axis)
-    data1 = sym.Variable("data1")
-    data2 = sym.Variable("data2")
-    net = sym.slice_like(data=data1, slice_like=data2, axis=axis)
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(net, target, {"data1": np_data.shape,
-                                                          "data2": np_shape_like.shape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input(**{"data1": np_data, "data2": np_shape_like})
-        m.run()
-        out = m.get_output(0, tvm.nd.empty(np_result.shape, dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
-
-def test_slice_like():
-    np_data = np.random.uniform(size=(3, 4, 5))
-    np_shape_like = np.random.uniform(size=(1, 2, 3))
-    verify_slice_like(np_data, np_shape_like)
-    np_data = np.random.uniform(size=(3, 4, 5))
-    np_shape_like = np.random.uniform(size=(1, 2))
-    verify_slice_like(np_data, np_shape_like)
-    np_data = np.random.uniform(size=(3, 4, 5))
-    np_shape_like = np.random.uniform(size=(1, 2, 3))
-    axis = (1, 2)
-    verify_slice_like(np_data, np_shape_like, axis)
-    np_data = np.random.uniform(size=(3, 4, 5))
-    np_shape_like = np.random.uniform(size=(1, 2, 3))
-    axis = (-1, -3)
-    verify_slice_like(np_data, np_shape_like, axis)
-    np_data = np.random.uniform(size=(1, 3, 224, 224))
-    np_shape_like = np.random.uniform(size=(1, 3, 112, 112))
-    axis = (2, 3)
-    verify_slice_like(np_data, np_shape_like, axis)
-
-def verify_where(condition, x, y):
-    dtype = "float32"
-    if len(condition.shape) == 1:
-        np_out = np.array([xv if c else yv for (c,xv,yv) in zip(condition,x,y)])
-    else:
-        np_out = np.where(condition, x, y)
-    cond_var = sym.Variable("condition")
-    x_var = sym.Variable("x")
-    y_var = sym.Variable("y")
-    net = sym.where(cond_var, x_var, y_var)
-    for target, ctx in ctx_list():
-        graph, lib, _ = nnvm.compiler.build(net, target, {"condition": condition.shape,
-                                                          "x": x.shape, "y": y.shape})
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input(**{"condition": condition, "x": x, "y": y})
-        m.run()
-        out = m.get_output(0, tvm.nd.empty(x.shape, dtype))
-        tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)
-
-def test_where():
-    shape = (13, 8, 224, 224, 6)
-    condition = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
-    x = np.random.uniform(size=shape).astype("float32")
-    y = np.random.uniform(size=shape).astype("float32")
-    verify_where(condition, x, y)
-    condition = np.random.uniform(low=-1, high=1, size=(shape[0],)).astype("float32")
-    x = np.random.uniform(size=shape).astype("float32")
-    y = np.random.uniform(size=shape).astype("float32")
-    verify_where(condition, x, y)
-
-def test_argmax():
-    dshape = (204800, 2)
-    oshape = (1, 320, 640)
-
-    dtype = "float32"
-    x = sym.Variable("x", shape=dshape, dtype=dtype)
-    x = sym.reshape(x, shape=(1, 320, 640, 2))
-    x = sym.transpose(x, axes=(0, 3, 1, 2))
-    y = sym.argmax(x, axis=1)
-    target_str = "llvm"
-    target = tvm.target.create(target_str)
-    ctx = tvm.context(target_str, 0)
-    with nnvm.compiler.build_config(opt_level=2):
-        graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape})
-    m = graph_runtime.create(graph, lib, ctx)
-    data = np.random.uniform(size=dshape).astype(dtype)
-    m.run(x=data)
-    np_reshape = np.reshape(data, (1, 320, 640, 2))
-    np_transpose = np.transpose(np_reshape, axes=(0, 3, 1, 2))
-    np_argmax = np.argmax(np_transpose, axis=1)
-    out = m.get_output(0)
-    np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5)
-
-if __name__ == "__main__":
-    test_reshape()
-    test_broadcast()
-    test_reduce()
-    test_collapse()
-    test_transpose()
-    test_clip()
-    test_greater()
-    test_less()
-    test_reshape_like()
-    test_expand_like()
-    test_elemwise_sum()
-    test_block_grad()
-    test_full()
-    test_flip()
-    test_multibox_prior()
-    test_multibox_transform_loc()
-    test_non_max_suppression()
-    test_slice_like()
-    test_where()
-    test_argmax()
-    print(nnvm.compiler.engine.dump())
diff --git a/nnvm/tests/python/frontend/caffe2/model_zoo/__init__.py b/nnvm/tests/python/frontend/caffe2/model_zoo/__init__.py
deleted file mode 100644
index 2dc1f08f6ec9..000000000000
--- a/nnvm/tests/python/frontend/caffe2/model_zoo/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Store for caffe2 examples and common models."""
-from __future__ import absolute_import as _abs
-import os
-import importlib
-
-models = [
-    'squeezenet',
-    'resnet50',
-    'vgg19',
-]
-
-# skip download if model exist
-for model in models:
-    try:
-        locals()['c2_' + model] = importlib.import_module('caffe2.python.models.' + model)
-    except ImportError:
-        os.system("python -m caffe2.python.models.download -i -f " + model)
-        locals()['c2_' + model] = importlib.import_module('caffe2.python.models.' + model)
diff --git a/nnvm/tests/python/frontend/caffe2/model_zoo/squeezenet.py b/nnvm/tests/python/frontend/caffe2/model_zoo/squeezenet.py
deleted file mode 100644
index 2de2d1075494..000000000000
--- a/nnvm/tests/python/frontend/caffe2/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from nnvm import symbol as sym
-from nnvm.testing.utils import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0)
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1)
-    # NOTE : Assume NCHW layout here
-    net = sym.concatenate(left, right, axis=1)
-
-    return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
-    net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
-                     padding=(padding, padding))
-    net = sym.relu(net)
-    return net
-
-# Net
-def get_symbol(num_classes, version, **kwargs):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    num_classes: int
-        The number of classification results
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-    """
-    assert version == '1.1', ("Unsupported SqueezeNet version {version}:"
-                              "1.1 expected".format(version=version))
-    net = sym.Variable("data")
-
-    net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2))
-    net = sym.relu(net)
-    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 16, 64, 64)
-    net = _make_fire(net, 16, 64, 64)
-    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 32, 128, 128)
-    net = _make_fire(net, 32, 128, 128)
-    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 48, 192, 192)
-    net = _make_fire(net, 48, 192, 192)
-    net = _make_fire(net, 64, 256, 256)
-    net = _make_fire(net, 64, 256, 256)
-
-    net = sym.dropout(net, rate=0.5)
-    net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
-    net = sym.relu(net)
-    net = sym.global_avg_pool2d(net)
-    return sym.softmax(net, axis=1)
-
-def get_workload(batch_size=1, num_classes=1000, version='1.0',
-                 image_shape=(3, 224, 224), dtype="float32", **kwargs):
-    """Get benchmark workload for SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    kwargs : dict
-        Extra arguments
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes, version=version, **kwargs)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/tests/python/frontend/caffe2/test_forward.py b/nnvm/tests/python/frontend/caffe2/test_forward.py
deleted file mode 100644
index 2a216314ba1a..000000000000
--- a/nnvm/tests/python/frontend/caffe2/test_forward.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import nnvm
-import tvm
-from tvm.contrib import graph_runtime
-from nnvm.testing.config import ctx_list
-from model_zoo import c2_squeezenet, c2_resnet50, c2_vgg19
-
-from caffe2.python import workspace
-
-
-def get_tvm_output(model,
-                   input_data,
-                   target,
-                   ctx,
-                   output_shape,
-                   output_dtype='float32'):
-    """ Generic function to execute and get tvm output"""
-    sym, params = nnvm.frontend.from_caffe2(model.init_net, model.predict_net)
-
-    # supporting multiple inputs in caffe2 in a bit tricky,
-    # because the input names can appear at the beginning or end of model.predict_net.external_input
-    assert isinstance(input_data, np.ndarray)
-
-    # here we use the first input blob to the first op to get the input name
-    input_names = model.predict_net.op[0].input[0]
-    shape_dict = {input_names: input_data.shape}
-    dtype_dict = {input_names: input_data.dtype}
-
-    graph, lib, params = nnvm.compiler.build(
-        sym, target, shape=shape_dict, dtype=dtype_dict, params=params)
-
-    m = graph_runtime.create(graph, lib, ctx)
-
-    # set inputs
-    m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-    m.set_input(**params)
-
-    # execute
-    m.run()
-
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, s in enumerate(output_shape):
-            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
-            tvm_output_list.append(tvm_output.asnumpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0, tvm.nd.empty((output_shape),
-                                                  output_dtype))
-        return tvm_output.asnumpy()
-
-
-def get_caffe2_output(model, x, dtype='float32'):
-    workspace.RunNetOnce(model.init_net)
-
-    input_blob = model.predict_net.op[0].input[0]
-    workspace.FeedBlob(input_blob, x.astype(dtype))
-    workspace.RunNetOnce(model.predict_net)
-
-    output_blob = model.predict_net.external_output[0]
-    c2_output = workspace.FetchBlob(output_blob)
-    return c2_output
-
-
-def verify_caffe2_forward_impl(model, data_shape, out_shape):
-    dtype = 'float32'
-    data = np.random.uniform(size=data_shape).astype(dtype)
-    c2_out = get_caffe2_output(model, data, dtype)
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, data, target, ctx, out_shape, dtype)
-        tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-def test_squeezenet1_1():
-    verify_caffe2_forward_impl(c2_squeezenet, (1, 3, 224, 224),
-                               (1, 1000, 1, 1))
-
-
-def test_resnet50():
-    verify_caffe2_forward_impl(c2_resnet50, (1, 3, 224, 224),
-                               (1, 1000))
-
-
-def test_vgg19():
-    verify_caffe2_forward_impl(c2_vgg19, (1, 3, 224, 224), (1, 1000))
-
-
-if __name__ == '__main__':
-    test_squeezenet1_1()
-    test_resnet50()
-    test_vgg19()
diff --git a/nnvm/tests/python/frontend/caffe2/test_graph.py b/nnvm/tests/python/frontend/caffe2/test_graph.py
deleted file mode 100644
index c8203815e6d0..000000000000
--- a/nnvm/tests/python/frontend/caffe2/test_graph.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test graph equality of caffe2 models."""
-import nnvm
-from nnvm.compiler import graph_util, graph_attr
-from model_zoo import c2_squeezenet, squeezenet
-
-def compare_graph(init, predict, nnvm_sym, ishape):
-    caffe2_sym, params = nnvm.frontend.from_caffe2(init, predict)
-    g1 = nnvm.graph.create(caffe2_sym)
-    g2 = nnvm.graph.create(nnvm_sym)
-    input_name = predict.external_input[0]
-    ishapes = {input_name: ishape}
-    graph_attr.set_shape_inputs(g1, ishapes)
-    graph_attr.set_shape_inputs(g2, ishapes)
-    g1 = g1.apply("InferShape").apply("SimplifyInference")
-    g2 = g2.apply("InferShape").apply("SimplifyInference")
-    graph_util.check_graph_equal(g1, g2)
-
-def test_squeeze_net():
-    symbol, params = squeezenet.get_workload(version='1.1')
-    compare_graph(c2_squeezenet.init_net, c2_squeezenet.predict_net, symbol, ishape=(1, 3, 224, 224))
-
-
-if __name__ == '__main__':
-    test_squeeze_net()
diff --git a/nnvm/tests/python/frontend/coreml/model_zoo/.gitignore b/nnvm/tests/python/frontend/coreml/model_zoo/.gitignore
deleted file mode 100644
index 4242a1b2e2e0..000000000000
--- a/nnvm/tests/python/frontend/coreml/model_zoo/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.mlmodel
-*.jpg
-*.png
diff --git a/nnvm/tests/python/frontend/coreml/model_zoo/__init__.py b/nnvm/tests/python/frontend/coreml/model_zoo/__init__.py
deleted file mode 100644
index 2dbaf2b10483..000000000000
--- a/nnvm/tests/python/frontend/coreml/model_zoo/__init__.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-from PIL import Image
-import numpy as np
-from tvm.contrib.download import download_testdata
-
-def get_mobilenet():
-    url = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel'
-    dst = 'mobilenet.mlmodel'
-    real_dst = download_testdata(url, dst, module='coreml')
-    return real_dst
-
-def get_resnet50():
-    url = 'https://docs-assets.developer.apple.com/coreml/models/Resnet50.mlmodel'
-    dst = 'resnet50.mlmodel'
-    real_dst = download_testdata(url, dst, module='coreml')
-    return real_dst
-
-def get_cat_image():
-    url = 'https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png'
-    dst = 'cat.png'
-    real_dst = download_testdata(url, dst, module='data')
-    img = Image.open(real_dst).resize((224, 224))
-    img = np.transpose(img, (2, 0, 1))[np.newaxis, :]
-    return np.asarray(img)
diff --git a/nnvm/tests/python/frontend/coreml/test_forward.py b/nnvm/tests/python/frontend/coreml/test_forward.py
deleted file mode 100644
index 7a9f294f4359..000000000000
--- a/nnvm/tests/python/frontend/coreml/test_forward.py
+++ /dev/null
@@ -1,370 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-from coremltools.models.neural_network import NeuralNetworkBuilder
-from coremltools.models import datatypes
-
-import tvm
-from tvm.contrib import graph_runtime
-import topi
-import topi.testing
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm import frontend
-import coremltools as cm
-import model_zoo
-
-def get_tvm_output(symbol, x, params, target, ctx,
-                   out_shape=(1, 1000), input_name='image', dtype='float32'):
-    shape_dict = {input_name : x.shape}
-    with nnvm.compiler.build_config(opt_level=2):
-        graph, lib, params = nnvm.compiler.build(symbol, target, shape_dict, params=params)
-    m = graph_runtime.create(graph, lib, ctx)
-    # set inputs
-    m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
-    m.set_input(**params)
-    m.run()
-    # get outputs
-    out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
-    return out.asnumpy()
-
-def run_model_checkonly(model_file, model_name=''):
-    model = cm.models.MLModel(model_file)
-    sym, params = nnvm.frontend.from_coreml(model)
-    x = model_zoo.get_cat_image()
-    for target, ctx in ctx_list():
-        tvm_output = get_tvm_output(sym, x, params, target, ctx)
-        print(target, ctx, model_name, 'prediction id: ', np.argmax(tvm_output.flat))
-
-def test_mobilenet_checkonly():
-    model_file = model_zoo.get_mobilenet()
-    run_model_checkonly(model_file, 'mobilenet')
-
-def test_resnet50_checkonly():
-    model_file = model_zoo.get_resnet50()
-    run_model_checkonly(model_file, 'resnet50')
-
-def run_tvm_graph(graph_def, input_data, input_name, output_shape, output_dtype='float32'):
-    """ Generic function to compile on nnvm and execute on tvm """
-
-    sym, params = nnvm.frontend.from_coreml(graph_def)
-    target = 'llvm'
-    if isinstance(input_data, list):
-        shape_dict = {}
-        dtype_dict = {}
-        for i, e in enumerate(input_name):
-            shape_dict[e] = input_data[i].shape
-            dtype_dict[e] = input_data[i].dtype
-    else:
-        shape_dict = {input_name: input_data.shape}
-        dtype_dict = {input_name: input_data.dtype}
-
-    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict,
-                                             dtype=dtype_dict, params=params)
-
-    ctx = tvm.cpu(0)
-    from tvm.contrib import graph_runtime
-    m = graph_runtime.create(graph, lib, ctx)
-    # set inputs
-    if isinstance(input_data, list):
-        for i, e in enumerate(input_name):
-            m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-    else:
-        m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    m.set_input(**params)
-    # execute
-    m.run()
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, s in enumerate(output_shape):
-            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
-            tvm_output_list.append(tvm_output.asnumpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0, tvm.nd.empty((output_shape), output_dtype))
-        return tvm_output.asnumpy()
-
-def verify_AddLayerParams(input_dim, alpha=2):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.add(a_np1, a_np2) + alpha
-    inputs = [('input1', datatypes.Array(*input_dim)),
-              ('input2', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(name='Add',
-                            alpha=alpha,
-                            input_names=['input1', 'input2'],
-                            output_name='output',
-                            mode='ADD')
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model,
-                           [a_np1, a_np2],
-                           ['input1', 'input2'],
-                           b_np.shape,
-                           dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_AddLayerParams():
-    verify_AddLayerParams((1, 2, 2), 0)
-    verify_AddLayerParams((1, 2, 2), 1)
-    verify_AddLayerParams((1, 3, 3), 2)
-
-def verify_MultiplyLayerParams(input_dim, alpha):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.multiply(a_np1, a_np2) * alpha
-    inputs = [('input1', datatypes.Array(*input_dim)),
-              ('input2', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(name='Mul',
-                            alpha=alpha,
-                            input_names=['input1', 'input2'],
-                            output_name='output',
-                            mode='MULTIPLY')
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model,
-                           [a_np1, a_np2],
-                           ['input1', 'input2'],
-                           b_np.shape,
-                           dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_MultiplyLayerParams():
-    verify_MultiplyLayerParams((1, 2, 2), 0)
-    verify_MultiplyLayerParams((1, 2, 2), 1)
-    verify_MultiplyLayerParams((1, 3, 3), 2)
-
-def verify_ConcatLayerParams(input1_dim, input2_dim):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input1_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input2_dim).astype(dtype)
-
-    b_np = np.concatenate((a_np1, a_np2), axis=1)
-    inputs = [('input1', datatypes.Array(*input1_dim)),
-              ('input2', datatypes.Array(*input2_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(name='Concate',
-                            input_names=['input1', 'input2'],
-                            output_name='output',
-                            mode='CONCAT')
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model,
-                           [a_np1, a_np2],
-                           ['input1', 'input2'],
-                           b_np.shape,
-                           dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_ConcatLayerParams():
-    verify_ConcatLayerParams((1, 1, 2, 2), (1, 2, 2, 2))
-    verify_ConcatLayerParams((1, 2, 4, 4), (1, 3, 4, 4))
-
-def verify_UpsampleLayerParams(input_dim, scale, mode):
-    dtype = "float32"
-
-    a_np = np.full(input_dim, 1, dtype=dtype)
-    if mode == 'NN':
-        b_np = topi.testing.upsampling_python(a_np, (scale, scale))
-    else:
-        new_h = input_dim[2] * scale
-        new_w = input_dim[3] * scale
-        b_np = topi.testing.bilinear_resize_python(a_np, (new_h, new_w), 'NCHW')
-
-    input = [('input', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input, output)
-    builder.add_upsample(name='Upsample',
-                         scaling_factor_h=scale,
-                         scaling_factor_w=scale,
-                         mode=mode,
-                         input_name='input',
-                         output_name='output')
-
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_UpsampleLayerParams():
-    verify_UpsampleLayerParams((1, 16, 32, 32), 2, 'NN')
-    verify_UpsampleLayerParams((1, 4, 6, 6), 3, 'BILINEAR')
-
-def verify_l2_normalize(input_dim, eps):
-    dtype = "float32"
-
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    b_np = topi.testing.l2_normalize_python(a_np, eps, 1)
-
-    input = [('input', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input, output)
-    builder.add_l2_normalize(name='L2', epsilon=eps, input_name='input', output_name='output')
-
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_l2_normalize():
-    verify_l2_normalize((1, 3, 20, 20), 0.001)
-
-def verify_lrn(input_dim, size, bias, alpha, beta):
-    dtype = "float32"
-    axis=1
-    a_np = np.random.uniform(size=input_dim).astype(dtype)
-    b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
-
-    input = [('input', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(input, output)
-    builder.add_lrn(name='LRN',
-                    input_name='input',
-                    output_name='output',
-                    alpha=alpha,
-                    beta=beta,
-                    k=bias,
-                    local_size=size)
-
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_lrn():
-    verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
-
-def verify_average(input_dim1, input_dim2, axis=0):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim1).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim2).astype(dtype)
-
-    b_np = np.mean((a_np1, a_np2), axis=axis)
-
-    inputs = [('input1', datatypes.Array(*input_dim1)),
-              ('input2', datatypes.Array(*input_dim2))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(name='MEAN',
-                            input_names=['input1', 'input2'],
-                            output_name='output',
-                            mode='AVE')
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model,
-                           [a_np1, a_np2],
-                           ['input1', 'input2'],
-                           b_np.shape,
-                           dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_average():
-    verify_average((1, 3, 20, 20), (1, 3, 20, 20))
-    verify_average((3, 20, 20), (1, 3, 20, 20))
-    verify_average((20, 20), (1, 3, 20, 20))
-
-def verify_max(input_dim):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.max((a_np1, a_np2, a_np3), axis=0)
-
-    inputs = [('input1', datatypes.Array(*input_dim)),
-              ('input2', datatypes.Array(*input_dim)),
-              ('input3', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(name='Max',
-                            input_names=['input1', 'input2', 'input3'],
-                            output_name='output',
-                            mode='MAX')
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model,
-                           [a_np1, a_np2, a_np3],
-                           ['input1', 'input2', 'input3'],
-                           b_np.shape,
-                           dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_max():
-    verify_max((1, 3, 20, 20))
-    verify_max((20, 20))
-
-def verify_min(input_dim):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.min((a_np1, a_np2, a_np3), axis=0)
-
-    inputs = [('input1', datatypes.Array(*input_dim)),
-              ('input2', datatypes.Array(*input_dim)),
-              ('input3', datatypes.Array(*input_dim))]
-    output = [('output', datatypes.Array(*b_np.shape))]
-    builder = NeuralNetworkBuilder(inputs, output)
-    builder.add_elementwise(name='Min',
-                            input_names=['input1', 'input2', 'input3'],
-                            output_name='output',
-                            mode='MIN')
-    model = cm.models.MLModel(builder.spec)
-    for target, ctx in ctx_list():
-        out = run_tvm_graph(model,
-                           [a_np1, a_np2, a_np3],
-                           ['input1', 'input2', 'input3'],
-                           b_np.shape,
-                           dtype)
-        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)
-
-def test_forward_min():
-    verify_min((1, 3, 20, 20))
-    verify_min((20, 20))
-
-if __name__ == '__main__':
-    test_mobilenet_checkonly()
-    test_resnet50_checkonly()
-    test_forward_AddLayerParams()
-    test_forward_ConcatLayerParams()
-    test_forward_MultiplyLayerParams()
-    test_forward_UpsampleLayerParams()
-    test_forward_l2_normalize()
-    test_forward_lrn()
-    test_forward_average()
-    test_forward_max()
-    test_forward_min()
diff --git a/nnvm/tests/python/frontend/darknet/test_forward.py b/nnvm/tests/python/frontend/darknet/test_forward.py
deleted file mode 100644
index 4e62ff2e1f33..000000000000
--- a/nnvm/tests/python/frontend/darknet/test_forward.py
+++ /dev/null
@@ -1,525 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile Darknet Models
-=====================
-This article is a test script to test darknet models with NNVM.
-All the required models and libraries will be downloaded from the internet
-by the script.
-"""
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-from tvm.contrib.download import download_testdata
-download_testdata.__test__ = False
-from nnvm import frontend
-from tvm.relay.testing.darknet import LAYERTYPE
-from tvm.relay.testing.darknet import __darknetffi__
-import nnvm.compiler
-
-DARKNET_LIB = 'libdarknet2.0.so'
-DARKNETLIB_URL = 'https://github.com/siju-samuel/darknet/blob/master/lib/' \
-                                    + DARKNET_LIB + '?raw=true'
-LIB = __darknetffi__.dlopen(download_testdata(DARKNETLIB_URL, DARKNET_LIB, module='darknet'))
-
-DARKNET_TEST_IMAGE_NAME = 'dog.jpg'
-DARKNET_TEST_IMAGE_URL = 'https://github.com/siju-samuel/darknet/blob/master/data/' + DARKNET_TEST_IMAGE_NAME +'?raw=true'
-DARKNET_TEST_IMAGE_PATH = download_testdata(DARKNET_TEST_IMAGE_URL, DARKNET_TEST_IMAGE_NAME, module='data')
-
-def _read_memory_buffer(shape, data, dtype='float32'):
-    length = 1
-    for x in shape:
-        length *= x
-    data_np = np.zeros(length, dtype=dtype)
-    for i in range(length):
-        data_np[i] = data[i]
-    return data_np.reshape(shape)
-
-def _get_tvm_output(net, data, build_dtype='float32'):
-    '''Compute TVM output'''
-    dtype = 'float32'
-    sym, params = frontend.darknet.from_darknet(net, dtype)
-
-    target = 'llvm'
-    shape_dict = {'data': data.shape}
-    graph, library, params = nnvm.compiler.build(sym, target, shape_dict,
-                                                 build_dtype, params=params)
-    # Execute on TVM
-    ctx = tvm.cpu(0)
-    m = graph_runtime.create(graph, library, ctx)
-    # set inputs
-    m.set_input('data', tvm.nd.array(data.astype(dtype)))
-    m.set_input(**params)
-    m.run()
-    # get outputs
-    tvm_out = []
-    for i in range(m.get_num_outputs()):
-        tvm_out.append(m.get_output(i).asnumpy())
-    return tvm_out
-
-def _load_net(cfg_url, cfg_name, weights_url, weights_name):
-    cfg_path = download_testdata(cfg_url, cfg_name, module='darknet')
-    weights_path = download_testdata(weights_url, weights_name, module='darknet')
-    net = LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0)
-    return net
-
-def verify_darknet_frontend(net, build_dtype='float32'):
-    '''Test network with given input image on both darknet and tvm'''
-    def get_darknet_output(net, img):
-        LIB.network_predict_image(net, img)
-        out = []
-        for i in range(net.n):
-            layer = net.layers[i]
-            if layer.type == LAYERTYPE.REGION:
-                attributes = np.array([layer.n, layer.out_c, layer.out_h,
-                                       layer.out_w, layer.classes,
-                                       layer.coords, layer.background],
-                                      dtype=np.int32)
-                out.insert(0, attributes)
-                out.insert(0, _read_memory_buffer((layer.n*2, ), layer.biases))
-                layer_outshape = (layer.batch, layer.out_c,
-                                  layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
-            elif layer.type == LAYERTYPE.YOLO:
-                attributes = np.array([layer.n, layer.out_c, layer.out_h,
-                                       layer.out_w, layer.classes,
-                                       layer.total],
-                                      dtype=np.int32)
-                out.insert(0, attributes)
-                out.insert(0, _read_memory_buffer((layer.total*2, ), layer.biases))
-                out.insert(0, _read_memory_buffer((layer.n, ), layer.mask, dtype='int32'))
-                layer_outshape = (layer.batch, layer.out_c,
-                                  layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(layer_outshape, layer.output))
-            elif i == net.n-1:
-                if layer.type == LAYERTYPE.CONNECTED:
-                    darknet_outshape = (layer.batch, layer.out_c)
-                elif layer.type in [LAYERTYPE.SOFTMAX]:
-                    darknet_outshape = (layer.batch, layer.outputs)
-                else:
-                    darknet_outshape = (layer.batch, layer.out_c,
-                                        layer.out_h, layer.out_w)
-                out.insert(0, _read_memory_buffer(darknet_outshape, layer.output))
-        return out
-
-    dtype = 'float32'
-
-    img = LIB.letterbox_image(LIB.load_image_color(DARKNET_TEST_IMAGE_PATH.encode('utf-8'), 0, 0), net.w, net.h)
-    darknet_output = get_darknet_output(net, img)
-    batch_size = 1
-    data = np.empty([batch_size, img.c, img.h, img.w], dtype)
-    i = 0
-    for c in range(img.c):
-        for h in range(img.h):
-            for k in range(img.w):
-                data[0][c][h][k] = img.data[i]
-                i = i + 1
-
-    tvm_out = _get_tvm_output(net, data, build_dtype)
-    for tvm_outs, darknet_out in zip(tvm_out, darknet_output):
-        tvm.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)
-
-def verify_rnn_forward(net):
-    '''Test network with given input data on both darknet and tvm'''
-    def get_darknet_network_predict(net, data):
-        return LIB.network_predict(net, data)
-    from cffi import FFI
-    ffi = FFI()
-    np_arr = np.zeros([1, net.inputs], dtype='float32')
-    np_arr[0, 84] = 1
-    cffi_arr = ffi.cast('float*', np_arr.ctypes.data)
-    tvm_out = _get_tvm_output(net, np_arr)[0]
-    darknet_output = get_darknet_network_predict(net, cffi_arr)
-    darknet_out = np.zeros(net.outputs, dtype='float32')
-    for i in range(net.outputs):
-        darknet_out[i] = darknet_output[i]
-    last_layer = net.layers[net.n-1]
-    darknet_outshape = (last_layer.batch, last_layer.outputs)
-    darknet_out = darknet_out.reshape(darknet_outshape)
-    tvm.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)
-
-def test_forward_extraction():
-    '''test extraction model'''
-    model_name = 'extraction'
-    cfg_name = model_name + '.cfg'
-    weights_name = model_name + '.weights'
-    cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
-    weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_alexnet():
-    '''test alexnet model'''
-    model_name = 'alexnet'
-    cfg_name = model_name + '.cfg'
-    weights_name = model_name + '.weights'
-    cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
-    weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_resnet50():
-    '''test resnet50 model'''
-    model_name = 'resnet50'
-    cfg_name = model_name + '.cfg'
-    weights_name = model_name + '.weights'
-    cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
-    weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_yolov2():
-    '''test yolov2 model'''
-    model_name = 'yolov2'
-    cfg_name = model_name + '.cfg'
-    weights_name = model_name + '.weights'
-    cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
-    weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    LIB.free_network(net)
-
-def test_forward_yolov3():
-    '''test yolov3 model'''
-    model_name = 'yolov3'
-    cfg_name = model_name + '.cfg'
-    weights_name = model_name + '.weights'
-    cfg_url = 'https://github.com/pjreddie/darknet/blob/master/cfg/' + cfg_name + '?raw=true'
-    weights_url = 'http://pjreddie.com/media/files/' + weights_name + '?raw=true'
-    net = _load_net(cfg_url, cfg_name, weights_url, weights_name)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    LIB.free_network(net)
-
-def test_forward_convolutional():
-    '''test convolutional layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_dense():
-    '''test fully connected layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_connected_layer(1, 75, 20, 1, 0, 0)
-    net.layers[0] = layer
-    net.w = net.h = 5
-    LIB.resize_network(net, 5, 5)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_dense_batchnorm():
-    '''test fully connected layer with batchnorm'''
-    net = LIB.make_network(1)
-    layer = LIB.make_connected_layer(1, 12, 2, 1, 1, 0)
-    for i in range(5):
-        layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1)
-        layer.scales[i] = np.random.rand(1)
-    net.layers[0] = layer
-    net.w = net.h = 2
-    LIB.resize_network(net, 2, 2)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_maxpooling():
-    '''test maxpooling layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_maxpool_layer(1, 224, 224, 3, 2, 2, 0)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_avgpooling():
-    '''test avgerage pooling layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_avgpool_layer(1, 224, 224, 3)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_batch_norm():
-    '''test batch normalization layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0)
-    for i in range(32):
-        layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1)
-    net.layers[0] = layer
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_shortcut():
-    '''test shortcut layer'''
-    net = LIB.make_network(3)
-    layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = LIB.make_convolutional_layer(1, 111, 111, 32, 32, 1, 1, 1, 0, 1, 0, 0, 0, 0)
-    layer_3 = LIB.make_shortcut_layer(1, 0, 111, 111, 32, 111, 111, 32)
-    layer_3.activation = 1
-    layer_3.alpha = 1
-    layer_3.beta = 1
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.layers[2] = layer_3
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_reorg():
-    '''test reorg layer'''
-    net = LIB.make_network(2)
-    layer_1 = LIB.make_convolutional_layer(1, 222, 222, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = LIB.make_reorg_layer(1, 110, 110, 32, 2, 0, 0, 0)
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 222
-    LIB.resize_network(net, 222, 222)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_region():
-    '''test region layer'''
-    net = LIB.make_network(2)
-    layer_1 = LIB.make_convolutional_layer(1, 19, 19, 3, 425, 1, 1, 1, 0, 1, 0, 0, 0, 0)
-    layer_2 = LIB.make_region_layer(1, 19, 19, 5, 80, 4)
-    layer_2.softmax = 1
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 19
-    LIB.resize_network(net, 19, 19)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    LIB.free_network(net)
-
-def test_forward_yolo_op():
-    '''test yolo layer'''
-    net = LIB.make_network(2)
-    layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 14, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_2 = LIB.make_yolo_layer(1, 111, 111, 2, 9, __darknetffi__.NULL, 2)
-    net.layers[0] = layer_1
-    net.layers[1] = layer_2
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    build_dtype = {}
-    verify_darknet_frontend(net, build_dtype)
-    LIB.free_network(net)
-
-def test_forward_upsample():
-    '''test upsample layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_upsample_layer(1, 19, 19, 3, 3)
-    layer.scale = 1
-    net.layers[0] = layer
-    net.w = net.h = 19
-    LIB.resize_network(net, 19, 19)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_l2normalize():
-    '''test l2 normalization layer'''
-    net = LIB.make_network(1)
-    layer = LIB.make_l2norm_layer(1, 224*224*3)
-    layer.c = layer.out_c = 3
-    layer.h = layer.out_h = 224
-    layer.w = layer.out_w = 224
-    net.layers[0] = layer
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_elu():
-    '''test elu activation layer'''
-    net = LIB.make_network(1)
-    layer_1 = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 0, 0, 0, 0)
-    layer_1.activation = 8
-    net.layers[0] = layer_1
-    net.w = net.h = 224
-    LIB.resize_network(net, 224, 224)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_softmax():
-    '''test softmax layer'''
-    net = LIB.make_network(1)
-    layer_1 = LIB.make_softmax_layer(1, 75, 1)
-    layer_1.temperature = 1
-    net.layers[0] = layer_1
-    net.w = net.h = 5
-    LIB.resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_softmax_temperature():
-    '''test softmax layer'''
-    net = LIB.make_network(1)
-    layer_1 = LIB.make_softmax_layer(1, 75, 1)
-    layer_1.temperature = 0.8
-    net.layers[0] = layer_1
-    net.w = net.h = 5
-    LIB.resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_rnn():
-    '''test RNN layer'''
-    net = LIB.make_network(1)
-    batch = 1
-    inputs = 256
-    outputs = 256
-    steps = 1
-    activation = 1
-    batch_normalize = 0
-    adam = 0
-    layer_1 = LIB.make_rnn_layer(batch, inputs, outputs, steps, activation, batch_normalize, adam)
-    net.layers[0] = layer_1
-    net.inputs = inputs
-    net.outputs = outputs
-    net.w = net.h = 0
-    LIB.resize_network(net, net.w, net.h)
-    verify_rnn_forward(net)
-    LIB.free_network(net)
-
-def _test_forward_crnn():
-    '''test CRNN layer'''
-    net = LIB.make_network(1)
-    batch = 1
-    c = 3
-    h = 224
-    w = 224
-    hidden_filters = c
-    output_filters = c
-    steps = 1
-    activation = 0
-    batch_normalize = 0
-    inputs = 256
-    outputs = 256
-    layer_1 = LIB.make_crnn_layer(batch, h, w, c, hidden_filters, output_filters,
-                                  steps, activation, batch_normalize)
-    net.layers[0] = layer_1
-    net.inputs = inputs
-    net.outputs = output_filters * h * w
-    net.w = w
-    net.h = h
-    LIB.resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-def test_forward_lstm():
-    '''test LSTM layer'''
-    net = LIB.make_network(1)
-    batch = 1
-    inputs = 256
-    outputs = 256
-    steps = 1
-    batch_normalize = 0
-    adam = 0
-    layer_1 = LIB.make_lstm_layer(batch, inputs, outputs, steps, batch_normalize, adam)
-    net.layers[0] = layer_1
-    net.inputs = inputs
-    net.outputs = outputs
-    net.w = net.h = 0
-    LIB.resize_network(net, net.w, net.h)
-    verify_rnn_forward(net)
-    LIB.free_network(net)
-
-def test_forward_gru():
-    '''test GRU layer'''
-    net = LIB.make_network(1)
-    batch = 1
-    inputs = 256
-    outputs = 256
-    steps = 1
-    batch_normalize = 0
-    adam = 0
-    layer_1 = LIB.make_gru_layer(batch, inputs, outputs, steps, batch_normalize, adam)
-    net.layers[0] = layer_1
-    net.inputs = inputs
-    net.outputs = outputs
-    net.w = net.h = 0
-    LIB.resize_network(net, net.w, net.h)
-    verify_rnn_forward(net)
-    LIB.free_network(net)
-
-def test_forward_activation_logistic():
-    '''test logistic activation layer'''
-    net = LIB.make_network(1)
-    batch = 1
-    h = 224
-    w = 224
-    c = 3
-    n = 32
-    groups = 1
-    size = 3
-    stride = 2
-    padding = 0
-    activation = 0
-    batch_normalize = 0
-    binary = 0
-    xnor = 0
-    adam = 0
-    layer_1 = LIB.make_convolutional_layer(batch, h, w, c, n, groups, size, stride, padding,
-                                           activation, batch_normalize, binary, xnor, adam)
-    net.layers[0] = layer_1
-    net.w = w
-    net.h = h
-    LIB.resize_network(net, net.w, net.h)
-    verify_darknet_frontend(net)
-    LIB.free_network(net)
-
-if __name__ == '__main__':
-    test_forward_resnet50()
-    test_forward_alexnet()
-    test_forward_extraction()
-    test_forward_yolov2()
-    test_forward_yolov3()
-    test_forward_convolutional()
-    test_forward_maxpooling()
-    test_forward_avgpooling()
-    test_forward_batch_norm()
-    test_forward_shortcut()
-    test_forward_dense()
-    test_forward_dense_batchnorm()
-    test_forward_softmax()
-    test_forward_softmax_temperature()
-    test_forward_rnn()
-    test_forward_reorg()
-    test_forward_region()
-    test_forward_yolo_op()
-    test_forward_upsample()
-    test_forward_l2normalize()
-    test_forward_elu()
-    test_forward_rnn()
-# FIXME: Skip CRNN test since it causes segfault in libdarknet2.0.so
-#    _test_forward_crnn()
-    test_forward_lstm()
-    test_forward_gru()
-    test_forward_activation_logistic()
diff --git a/nnvm/tests/python/frontend/keras/test_forward.py b/nnvm/tests/python/frontend/keras/test_forward.py
deleted file mode 100644
index 78e4204e8250..000000000000
--- a/nnvm/tests/python/frontend/keras/test_forward.py
+++ /dev/null
@@ -1,354 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import nnvm
-import tvm
-from tvm.contrib import graph_runtime
-from nnvm.testing.config import ctx_list
-import keras
-
-# prevent keras from using up all gpu memory
-import tensorflow as tf
-from keras.backend.tensorflow_backend import set_session
-config = tf.ConfigProto()
-config.gpu_options.per_process_gpu_memory_fraction = 0.5
-set_session(tf.Session(config=config))
-
-
-def verify_keras_frontend(keras_model, need_transpose=True):
-    # Keras frontend currently supports tensorflow backend only.
-    assert(keras.backend.backend() == 'tensorflow')
-
-    in_shapes = []
-    for layer in keras_model._input_layers:
-        in_shapes.append(tuple(dim.value if dim.value is not None else 1 for dim in layer.input.shape))
-
-    def get_keras_output(xs, dtype='float32'):
-        return keras_model.predict(xs)
-
-    def get_tvm_output(xs, target, ctx, dtype='float32'):
-        sym, params = nnvm.frontend.from_keras(keras_model)
-        shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)}
-        with nnvm.compiler.build_config(opt_level=2):
-            graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        for name, x in zip(keras_model.input_names, xs):
-            m.set_input(name, tvm.nd.array(x.astype(dtype)))
-        m.set_input(**params)
-        m.run()
-
-        return [m.get_output(i).asnumpy() for i in range(m.get_num_outputs())]
-
-    def to_channels_first(arr):
-        return arr.transpose([0, -1] + list(range(1, arr.ndim - 1)))
-
-    def to_channels_last(arr):
-        return arr.transpose([0] + list(range(2, arr.ndim)) + [1])
-
-    xs = [np.random.uniform(size=shape, low=-1.0, high=1.0) for shape in in_shapes]
-    keras_out = get_keras_output(xs)
-
-    keras_out = keras_out if isinstance(keras_out, list) else [keras_out]
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output([to_channels_first(x) for x in xs] if need_transpose else xs, target, ctx)
-        for kout, tout in zip(keras_out, tvm_out):
-            if need_transpose:
-                tout = to_channels_last(tout)
-            tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
-
-def test_forward_elemwise_add():
-    r = []
-    data = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
-    r.append(x)
-    x = keras.layers.Conv2D(8, (3, 3), padding="same")(x)
-    r.append(x)
-    x = keras.layers.Conv2D(8, (3, 3), padding="same")(x)
-    # add two symbols
-    y = keras.layers.add([keras.layers.add([x, r[0]]), r[1]])
-    y = keras.layers.GlobalAveragePooling2D()(y)
-    keras_model = keras.models.Model(data, y)
-    verify_keras_frontend(keras_model)
-    # add three symbols
-    y = keras.layers.add([x, r[0], r[1]])
-    y = keras.layers.GlobalAveragePooling2D()(y)
-    keras_model = keras.models.Model(data, y)
-    verify_keras_frontend(keras_model)
-
-
-def _test_forward_dense():
-    data = keras.layers.Input(shape=(32,32,1))
-    x = keras.layers.Flatten()(data)
-    x = keras.layers.Dropout(0.5)(x)
-    x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(x)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model)
-
-def _test_forward_dense_with_3d_inp():
-    data = keras.layers.Input(shape=(1, 20))
-    x = keras.layers.Dense(10, activation='relu', kernel_initializer='uniform')(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-def test_forward_dense():
-    _test_forward_dense()
-    _test_forward_dense_with_3d_inp()
-
-def test_forward_pool():
-    data = keras.layers.Input(shape=(32,32,1))
-    # maxpool
-    x = keras.layers.MaxPooling2D((3, 3), strides=(1, 1), padding='same')(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model)
-    # avgpool
-    y = keras.layers.AveragePooling2D((3, 3), strides=(1, 1), padding='same')(data)
-    keras_model = keras.models.Model(data, y)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_conv():
-    data = keras.layers.Input(shape=(32,32,3))
-    conv_funcs = [keras.layers.Conv2D(filters=10, kernel_size=(3,3),
-                                      strides=(2,2), padding='same'),
-                  keras.layers.Conv2D(filters=10, kernel_size=(3,3),
-                                      dilation_rate=(2,2), padding='same'),
-                  keras.layers.DepthwiseConv2D(kernel_size=(3,3), padding='same'),
-                  keras.layers.Conv2DTranspose(filters=10, kernel_size=(3,3), padding='valid'),
-                  keras.layers.SeparableConv2D(filters=10, kernel_size=(3,3), padding='same')]
-    for conv_func in conv_funcs:
-        x = conv_func(data)
-        keras_model = keras.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-
-
-def test_forward_upsample():
-    data = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.UpSampling2D(size=(3,3))(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_reshape():
-    data = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.Reshape(target_shape=(32,32,3))(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_crop():
-    data = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.Cropping2D(cropping=((1, 1), (1, 1)))(data)
-    x = keras.layers.Cropping2D(cropping=(1, 1))(x)
-    x = keras.layers.Cropping2D(cropping=1)(x)
-    x = keras.layers.Cropping2D(cropping=((0, 1), (1, 0)))(x)
-    x = keras.layers.Cropping2D(cropping=(1, 0))(x)
-    x = keras.layers.Cropping2D(cropping=0)(x)
-    x = keras.layers.Add()([x, x])
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_vgg16():
-    keras_model = keras.applications.vgg16.VGG16(include_top=True, weights='imagenet',
-        input_shape=(224,224,3), classes=1000)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_xception():
-    keras_model = keras.applications.xception.Xception(include_top=True, weights='imagenet',
-        input_shape=(299,299,3), classes=1000)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_resnet50():
-    keras_model = keras.applications.resnet50.ResNet50(include_top=True, weights='imagenet',
-        input_shape=(224,224,3), classes=1000)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_mobilenet():
-    keras_model = keras.applications.mobilenet.MobileNet(include_top=True, weights='imagenet',
-        input_shape=(224,224,3), classes=1000)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_activations():
-    data = keras.layers.Input(shape=(32,32,3))
-    weights = np.random.rand(1, 32, 32, 3)
-    act_funcs = [keras.layers.Activation('softmax'),
-                 keras.layers.Activation('softplus'),
-                 keras.layers.ReLU(),
-                 keras.layers.ReLU(max_value=6.),
-                 keras.layers.LeakyReLU(alpha=0.3),
-                 keras.layers.PReLU(weights=weights, alpha_initializer="zero"),
-                 keras.layers.ELU(alpha=0.5),
-                 keras.layers.Activation('selu'),
-                 keras.layers.ThresholdedReLU(theta=0.5),
-                 keras.layers.Activation('softsign'),
-                 keras.layers.Activation('hard_sigmoid'),
-                 keras.layers.Activation('sigmoid'),
-                 keras.layers.Activation('tanh'),
-                 keras.layers.Activation('linear')]
-    for act_func in act_funcs:
-        x = act_func(data)
-        keras_model = keras.models.Model(data, x)
-        verify_keras_frontend(keras_model)
-
-
-def test_forward_multi_inputs():
-    data1 = keras.layers.Input(shape=(32,32,3))
-    data2 = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.Conv2D(8, (3, 3), padding="same")(data1)
-    y = keras.layers.Conv2D(8, (3, 3), padding="same")(data2)
-    z = keras.layers.add([x, y])
-    z = keras.layers.GlobalAveragePooling2D()(z)
-    keras_model = keras.models.Model([data1, data2], z)
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_multi_outputs():
-    data = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
-    x = keras.layers.GlobalAveragePooling2D()(x)
-    y = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
-    y = keras.layers.GlobalAveragePooling2D()(y)
-    keras_model = keras.models.Model(data, [x, y])
-    verify_keras_frontend(keras_model)
-
-
-def test_forward_reuse_layers():
-    # reuse conv2d
-    data = keras.layers.Input(shape=(32,32,3))
-    conv2d = keras.layers.Conv2D(8, (3, 3), padding="same")
-    x = conv2d(data)
-    y = conv2d(data)
-    z = keras.layers.add([x, y])
-    z = keras.layers.GlobalAveragePooling2D()(z)
-    keras_model = keras.models.Model(data, z)
-    verify_keras_frontend(keras_model)
-
-    # reuse add
-    data = keras.layers.Input(shape=(32,32,3))
-    x = keras.layers.Conv2D(8, (3, 3), padding="same")(data)
-    add = keras.layers.Add()
-    x = add([x, x])
-    x = add([x, x])
-    z = keras.layers.GlobalAveragePooling2D()(x)
-    keras_model = keras.models.Model(data, z)
-    verify_keras_frontend(keras_model)
-
-def _test_LSTM(time_steps, inputs, hidden, return_state=True):
-    data = keras.layers.Input(shape=(time_steps, inputs))
-    lstm_out = keras.layers.LSTM(hidden,
-                                 return_state=return_state,
-                                 recurrent_activation='sigmoid',
-                                 activation='tanh')
-    x = lstm_out(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-def _test_LSTM_MultiLayer(inputs, hidden):
-    inputs = keras.layers.Input(shape=(1, inputs))
-    layer = keras.layers.LSTM(hidden, return_state=True, return_sequences=True,
-                                 recurrent_activation='sigmoid',
-                                 activation='tanh')
-    outputs = layer(inputs)
-    output, state = outputs[0], outputs[1:]
-    output = keras.layers.LSTM(hidden, recurrent_activation='sigmoid',
-                               activation='tanh')(output, initial_state=state)
-    keras_model = keras.models.Model(inputs, output)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-
-def test_forward_LSTM():
-    _test_LSTM(1, 8, 8, return_state=True)
-    _test_LSTM(1, 4, 4, return_state=False)
-    _test_LSTM(20, 16, 256, return_state=False)
-    _test_LSTM_MultiLayer(4, 4)
-
-def _test_RNN(inputs, units):
-    data = keras.layers.Input(shape=(1, inputs))
-    rnn_out = keras.layers.SimpleRNN(units, return_state=True,
-                                 activation='tanh')
-    x = rnn_out(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-def _test_RNN_MultiLayer(inputs, units):
-    inputs = keras.layers.Input(shape=(1, inputs))
-    layer = keras.layers.SimpleRNN(units, return_state=True, return_sequences=True,
-                                   activation='tanh')
-    outputs = layer(inputs)
-    output, state = outputs[0], outputs[1:]
-    output = keras.layers.SimpleRNN(units, activation='tanh')(output, initial_state=state)
-    keras_model = keras.models.Model(inputs, output)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-def test_forward_RNN():
-    _test_RNN(2, 4)
-    _test_RNN(4, 3)
-    _test_RNN_MultiLayer(4, 12)
-
-def _test_GRU(inputs, units):
-    data = keras.layers.Input(shape=(1, inputs))
-    gru_out = keras.layers.GRU(units,
-                               return_state=True,
-                               recurrent_activation='sigmoid',
-                               activation='tanh')
-    x = gru_out(data)
-    keras_model = keras.models.Model(data, x)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-def _test_GRU_MultiLayer(inputs, units):
-    inputs = keras.layers.Input(shape=(1, inputs))
-    layer = keras.layers.GRU(units,
-                             return_state=True,
-                             return_sequences=True,
-                             recurrent_activation='sigmoid',
-                             activation='tanh')
-    outputs = layer(inputs)
-    output, state = outputs[0], outputs[1:]
-    output = keras.layers.GRU(units, recurrent_activation='sigmoid',
-                              activation='tanh')(output, initial_state=state)
-    keras_model = keras.models.Model(inputs, output)
-    verify_keras_frontend(keras_model, need_transpose=False)
-
-def test_forward_GRU():
-    _test_GRU(2, 4)
-    _test_GRU(4, 3)
-    _test_GRU_MultiLayer(4, 4)
-
-if __name__ == '__main__':
-    test_forward_elemwise_add()
-    test_forward_activations()
-    test_forward_dense()
-    test_forward_pool()
-    test_forward_conv()
-    test_forward_upsample()
-    test_forward_reshape()
-    test_forward_crop()
-    test_forward_vgg16()
-    test_forward_xception()
-    test_forward_resnet50()
-    test_forward_mobilenet()
-
-    test_forward_multi_inputs()
-    test_forward_multi_outputs()
-    test_forward_reuse_layers()
-    test_forward_LSTM()
-    test_forward_RNN()
-    test_forward_GRU()
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/__init__.py b/nnvm/tests/python/frontend/mxnet/model_zoo/__init__.py
deleted file mode 100644
index 3922ba673f2f..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/__init__.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""MXNet and NNVM model zoo."""
-from __future__ import absolute_import
-from . import mlp, resnet, vgg, dqn, dcgan, squeezenet, inception_v3
-import nnvm.testing
-
-_num_class = 1000
-
-# mlp fc
-mx_mlp = mlp.get_symbol(_num_class)
-nnvm_mlp = nnvm.testing.mlp.get_workload(1, _num_class)[0]
-
-# resnet fc
-mx_resnet = {}
-nnvm_resnet = {}
-for num_layer in [18, 34, 50, 101, 152, 200, 269]:
-    mx_resnet[num_layer] = resnet.get_symbol(_num_class, num_layer, '3,224,224')
-    nnvm_resnet[num_layer] = nnvm.testing.resnet.get_workload(
-        1, _num_class, num_layers=num_layer)[0]
-
-# vgg fc
-mx_vgg = {}
-nnvm_vgg = {}
-for num_layer in [11, 13, 16, 19]:
-    mx_vgg[num_layer] = vgg.get_symbol(_num_class, num_layer)
-    nnvm_vgg[num_layer] = nnvm.testing.vgg.get_workload(
-        1, _num_class, num_layers=num_layer)[0]
-
-# squeezenet
-mx_squeezenet = {}
-nnvm_squeezenet = {}
-for version in ['1.0', '1.1']:
-    mx_squeezenet[version] = squeezenet.get_symbol(version=version)
-    nnvm_squeezenet[version] = nnvm.testing.squeezenet.get_workload(1, version=version)[0]
-
-# inception
-mx_inception_v3 = inception_v3.get_symbol()
-nnvm_inception_v3 = nnvm.testing.inception_v3.get_workload(1)[0]
-
-# dqn
-mx_dqn = dqn.get_symbol()
-nnvm_dqn = nnvm.testing.dqn.get_workload(1)[0]
-
-# dcgan generator
-mx_dcgan = dcgan.get_symbol()
-nnvm_dcgan = nnvm.testing.dcgan.get_workload(1)[0]
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py b/nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py
deleted file mode 100644
index e606b78e1597..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=unused-argument
-"""
-The MXNet symbol of DCGAN generator
-
-Adopted from:
-https://github.com/tqchen/mxnet-gan/blob/master/mxgan/generator.py
-
-Reference:
-Radford, Alec, Luke Metz, and Soumith Chintala.
-"Unsupervised representation learning with deep convolutional generative adversarial networks."
-arXiv preprint arXiv:1511.06434 (2015).
-"""
-
-import mxnet as mx
-
-def deconv2d(data, ishape, oshape, kshape, name, stride=(2, 2)):
-    """a deconv layer that enlarges the feature map"""
-    target_shape = (oshape[-2], oshape[-1])
-    pad_y = (kshape[0] - 1) // 2
-    pad_x = (kshape[1] - 1) // 2
-    adj_y = (target_shape[0] + 2 * pad_y - kshape[0]) % stride[0]
-    adj_x = (target_shape[1] + 2 * pad_x - kshape[1]) % stride[1]
-
-    net = mx.sym.Deconvolution(data,
-                               kernel=kshape,
-                               stride=stride,
-                               pad=(pad_y, pad_x),
-                               adj=(adj_y, adj_x),
-                               num_filter=oshape[0],
-                               no_bias=True,
-                               name=name)
-    return net
-
-def deconv2d_bn_relu(data, prefix, **kwargs):
-    """a block of deconv + batch norm + relu"""
-    eps = 1e-5 + 1e-12
-
-    net = deconv2d(data, name="%s_deconv" % prefix, **kwargs)
-    net = mx.sym.BatchNorm(net, eps=eps, name="%s_bn" % prefix)
-    net = mx.sym.Activation(net, name="%s_act" % prefix, act_type='relu')
-    return net
-
-def get_symbol(oshape=(3, 64, 64), ngf=128, code=None):
-    """get symbol of dcgan generator"""
-    assert oshape[-1] == 64, "Only support 64x64 image"
-    assert oshape[-2] == 64, "Only support 64x64 image"
-
-    code = mx.sym.Variable("data") if code is None else code
-    net = mx.sym.FullyConnected(code, name="g1", num_hidden=ngf*8*4*4, no_bias=True, flatten=False)
-    net = mx.sym.Activation(net, act_type='relu')
-    # 4 x 4
-    net = mx.sym.reshape(net, shape=(-1, ngf * 8, 4, 4))
-    # 8 x 8
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 8, 4, 4), oshape=(ngf * 4, 8, 8), kshape=(4, 4), prefix="g2")
-    # 16x16
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 4, 8, 8), oshape=(ngf * 2, 16, 16), kshape=(4, 4), prefix="g3")
-    # 32x32
-    net = deconv2d_bn_relu(
-        net, ishape=(ngf * 2, 16, 16), oshape=(ngf, 32, 32), kshape=(4, 4), prefix="g4")
-    # 64x64
-    net = deconv2d(
-        net, ishape=(ngf, 32, 32), oshape=oshape[-3:], kshape=(4, 4), name="g5_deconv")
-    net = mx.sym.Activation(net, act_type='tanh')
-    return net
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/dqn.py b/nnvm/tests/python/frontend/mxnet/model_zoo/dqn.py
deleted file mode 100644
index e661e18debcb..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/dqn.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-The mxnet symbol of Nature DQN
-
-Reference:
-Mnih, Volodymyr, et al.
-"Human-level control through deep reinforcement learning."
-Nature 518.7540 (2015): 529.
-"""
-
-import mxnet as mx
-
-def get_symbol(num_action=18):
-    data = mx.sym.Variable(name='data')
-    net = mx.sym.Convolution(data, kernel=(8, 8), stride=(4, 4),
-                             num_filter=32, name='conv1')
-    net = mx.sym.Activation(net, act_type='relu', name='relu1')
-    net = mx.sym.Convolution(net, kernel=(4, 4), stride=(2, 2),
-                             num_filter=64, name='conv2')
-    net = mx.sym.Activation(net, act_type='relu', name='relu2')
-    net = mx.sym.Convolution(net, kernel=(3, 3), stride=(1, 1),
-                             num_filter=64, name='conv3')
-    net = mx.sym.Activation(net, act_type='relu', name='relu3')
-    net = mx.sym.FullyConnected(net, num_hidden=512, name='fc4')
-    net = mx.sym.Activation(net, act_type='relu', name='relu4')
-    net = mx.sym.FullyConnected(net, num_hidden=num_action, name='fc5', flatten=False)
-
-    return net
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/inception_v3.py b/nnvm/tests/python/frontend/mxnet/model_zoo/inception_v3.py
deleted file mode 100644
index 8e8f36a3e644..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/inception_v3.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Inception V3, suitable for images with around 299 x 299
-
-Reference:
-Szegedy, Christian, et al. "Rethinking the Inception Architecture for Computer Vision." arXiv preprint arXiv:1512.00567 (2015).
-
-Adopted from https://github.com/apache/incubator-mxnet/blob/
-             master/example/image-classification/symbols/inception-v3.py
-"""
-import mxnet as mx
-import numpy as np
-
-def Conv(data, num_filter, kernel=(1, 1), stride=(1, 1), pad=(0, 0), name=None, suffix=''):
-    conv = mx.sym.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, no_bias=True, name='%s%s_conv2d' %(name, suffix))
-    bn = mx.sym.BatchNorm(data=conv, eps=2e-5, name='%s%s_batchnorm' % (name, suffix))
-    act = mx.sym.Activation(data=bn, act_type='relu', name='%s%s_relu' %(name, suffix))
-    return act
-
-
-def Inception7A(data,
-                num_1x1,
-                num_3x3_red, num_3x3_1, num_3x3_2,
-                num_5x5_red, num_5x5,
-                pool, proj,
-                name):
-    tower_1x1 = Conv(data, num_1x1, name=('%s_conv' % name))
-    tower_5x5 = Conv(data, num_5x5_red, name=('%s_tower' % name), suffix='_conv')
-    tower_5x5 = Conv(tower_5x5, num_5x5, kernel=(5, 5), pad=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
-    tower_3x3 = Conv(data, num_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
-    tower_3x3 = Conv(tower_3x3, num_3x3_1, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_3x3 = Conv(tower_3x3, num_3x3_2, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_2')
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(pooling, proj, name=('%s_tower_2' %  name), suffix='_conv')
-    concat = mx.sym.Concat(*[tower_1x1, tower_5x5, tower_3x3, cproj], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-# First Downsample
-def Inception7B(data,
-                num_3x3,
-                num_d3x3_red, num_d3x3_1, num_d3x3_2,
-                pool,
-                name):
-    tower_3x3 = Conv(data, num_3x3, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_conv' % name))
-    tower_d3x3 = Conv(data, num_d3x3_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d3x3 = Conv(tower_d3x3, num_d3x3_1, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_tower' % name), suffix='_conv_1')
-    tower_d3x3 = Conv(tower_d3x3, num_d3x3_2, kernel=(3, 3), pad=(0, 0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_2')
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(0,0), pool_type="max", name=('max_pool_%s_pool' % name))
-    concat = mx.sym.Concat(*[tower_3x3, tower_d3x3, pooling], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def Inception7C(data,
-                num_1x1,
-                num_d7_red, num_d7_1, num_d7_2,
-                num_q7_red, num_q7_1, num_q7_2, num_q7_3, num_q7_4,
-                pool, proj,
-                name):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
-    tower_d7 = Conv(data=data, num_filter=num_d7_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d7 = Conv(data=tower_d7, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower' % name), suffix='_conv_1')
-    tower_d7 = Conv(data=tower_d7, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower' % name), suffix='_conv_2')
-    tower_q7 = Conv(data=data, num_filter=num_q7_red, name=('%s_tower_1' % name), suffix='_conv')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_1, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_2, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_2')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_3, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_3')
-    tower_q7 = Conv(data=tower_q7, num_filter=num_q7_4, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_4')
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' %  name), suffix='_conv')
-    # concat
-    concat = mx.sym.Concat(*[tower_1x1, tower_d7, tower_q7, cproj], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def Inception7D(data,
-                num_3x3_red, num_3x3,
-                num_d7_3x3_red, num_d7_1, num_d7_2, num_d7_3x3,
-                pool,
-                name):
-    tower_3x3 = Conv(data=data, num_filter=num_3x3_red, name=('%s_tower' % name), suffix='_conv')
-    tower_3x3 = Conv(data=tower_3x3, num_filter=num_3x3, kernel=(3, 3), pad=(0,0), stride=(2, 2), name=('%s_tower' % name), suffix='_conv_1')
-    tower_d7_3x3 = Conv(data=data, num_filter=num_d7_3x3_red, name=('%s_tower_1' % name), suffix='_conv')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_1, kernel=(1, 7), pad=(0, 3), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_2, kernel=(7, 1), pad=(3, 0), name=('%s_tower_1' % name), suffix='_conv_2')
-    tower_d7_3x3 = Conv(data=tower_d7_3x3, num_filter=num_d7_3x3, kernel=(3, 3), stride=(2, 2), name=('%s_tower_1' % name), suffix='_conv_3')
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    # concat
-    concat = mx.sym.Concat(*[tower_3x3, tower_d7_3x3, pooling], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def Inception7E(data,
-                num_1x1,
-                num_d3_red, num_d3_1, num_d3_2,
-                num_3x3_d3_red, num_3x3, num_3x3_d3_1, num_3x3_d3_2,
-                pool, proj,
-                name):
-    tower_1x1 = Conv(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_conv' % name))
-    tower_d3 = Conv(data=data, num_filter=num_d3_red, name=('%s_tower' % name), suffix='_conv')
-    tower_d3_a = Conv(data=tower_d3, num_filter=num_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower' % name), suffix='_mixed_conv')
-    tower_d3_b = Conv(data=tower_d3, num_filter=num_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower' % name), suffix='_mixed_conv_1')
-    tower_3x3_d3 = Conv(data=data, num_filter=num_3x3_d3_red, name=('%s_tower_1' % name), suffix='_conv')
-    tower_3x3_d3 = Conv(data=tower_3x3_d3, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_tower_1' % name), suffix='_conv_1')
-    tower_3x3_d3_a = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_1, kernel=(1, 3), pad=(0, 1), name=('%s_tower_1' % name), suffix='_mixed_conv')
-    tower_3x3_d3_b = Conv(data=tower_3x3_d3, num_filter=num_3x3_d3_2, kernel=(3, 1), pad=(1, 0), name=('%s_tower_1' % name), suffix='_mixed_conv_1')
-    pooling = mx.sym.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
-    cproj = Conv(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_tower_2' %  name), suffix='_conv')
-    # concat
-    concat = mx.sym.Concat(*[tower_1x1, tower_d3_a, tower_d3_b, tower_3x3_d3_a, tower_3x3_d3_b, cproj], name='ch_concat_%s_chconcat' % name)
-    return concat
-
-def get_symbol(num_classes=1000, **kwargs):
-    data = mx.sym.Variable(name="data")
-    # stage 1
-    conv = Conv(data, 32, kernel=(3, 3), stride=(2, 2), name="conv")
-    conv_1 = Conv(conv, 32, kernel=(3, 3), name="conv_1")
-    conv_2 = Conv(conv_1, 64, kernel=(3, 3), pad=(1, 1), name="conv_2")
-    pool = mx.sym.Pooling(data=conv_2, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool")
-    # stage 2
-    conv_3 = Conv(pool, 80, kernel=(1, 1), name="conv_3")
-    conv_4 = Conv(conv_3, 192, kernel=(3, 3), name="conv_4")
-    pool1 = mx.sym.Pooling(data=conv_4, kernel=(3, 3), stride=(2, 2), pool_type="max", name="pool1")
-
-    # # stage 3
-    in3a = Inception7A(pool1, 64,
-                       64, 96, 96,
-                       48, 64,
-                       "avg", 32, "mixed")
-    in3b = Inception7A(in3a, 64,
-                       64, 96, 96,
-                       48, 64,
-                       "avg", 64, "mixed_1")
-    in3c = Inception7A(in3b, 64,
-                       64, 96, 96,
-                       48, 64,
-                       "avg", 64, "mixed_2")
-    in3d = Inception7B(in3c, 384,
-                       64, 96, 96,
-                       "max", "mixed_3")
-    # stage 4
-    in4a = Inception7C(in3d, 192,
-                       128, 128, 192,
-                       128, 128, 128, 128, 192,
-                       "avg", 192, "mixed_4")
-    in4b = Inception7C(in4a, 192,
-                       160, 160, 192,
-                       160, 160, 160, 160, 192,
-                       "avg", 192, "mixed_5")
-    in4c = Inception7C(in4b, 192,
-                       160, 160, 192,
-                       160, 160, 160, 160, 192,
-                       "avg", 192, "mixed_6")
-    in4d = Inception7C(in4c, 192,
-                       192, 192, 192,
-                       192, 192, 192, 192, 192,
-                       "avg", 192, "mixed_7")
-    in4e = Inception7D(in4d, 192, 320,
-                       192, 192, 192, 192,
-                       "max", "mixed_8")
-    # stage 5
-    in5a = Inception7E(in4e, 320,
-                       384, 384, 384,
-                       448, 384, 384, 384,
-                       "avg", 192, "mixed_9")
-    in5b = Inception7E(in5a, 320,
-                       384, 384, 384,
-                       448, 384, 384, 384,
-                       "max", 192, "mixed_10")
-    # pool
-    pool = mx.sym.Pooling(data=in5b, kernel=(8, 8), stride=(1, 1), pool_type="avg", name="global_pool")
-    flatten = mx.sym.Flatten(data=pool, name="flatten")
-    fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1', flatten=False)
-    softmax = mx.sym.SoftmaxOutput(data=fc1, name='softmax')
-    return softmax
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/mlp.py b/nnvm/tests/python/frontend/mxnet/model_zoo/mlp.py
deleted file mode 100644
index 922b208749bf..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/mlp.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-a simple multilayer perceptron
-"""
-import mxnet as mx
-
-def get_symbol(num_classes=10, **kwargs):
-    data = mx.symbol.Variable('data')
-    data = mx.sym.Flatten(data=data)
-    try:
-        fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128, flatten=False)
-        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
-        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64, flatten=False)
-        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
-        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes, flatten=False)
-        mlp  = mx.symbol.softmax(data = fc3, name = 'softmax')
-    except:
-        fc1  = mx.symbol.FullyConnected(data = data, name='fc1', num_hidden=128)
-        act1 = mx.symbol.Activation(data = fc1, name='relu1', act_type="relu")
-        fc2  = mx.symbol.FullyConnected(data = act1, name = 'fc2', num_hidden = 64)
-        act2 = mx.symbol.Activation(data = fc2, name='relu2', act_type="relu")
-        fc3  = mx.symbol.FullyConnected(data = act2, name='fc3', num_hidden=num_classes)
-        mlp  = mx.symbol.softmax(data = fc3, name = 'softmax')
-    return mlp
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/resnet.py b/nnvm/tests/python/frontend/mxnet/model_zoo/resnet.py
deleted file mode 100644
index 3f9a870d31c0..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/resnet.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-'''
-Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
-Original author Wei Wu
-
-Implemented the following paper:
-
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
-'''
-import mxnet as mx
-import numpy as np
-
-def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
-    """Return ResNet Unit symbol for building ResNet
-    Parameters
-    ----------
-    data : str
-        Input data
-    num_filter : int
-        Number of output channels
-    bnf : int
-        Bottle neck channels factor with regard to num_filter
-    stride : tuple
-        Stride used in convolution
-    dim_match : Boolean
-        True means channel number between input and output is the same, otherwise means differ
-    name : str
-        Base name of the operators
-    workspace : int
-        Workspace used in convolution operator
-    """
-    if bottle_neck:
-        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
-        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
-        conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=stride, pad=(0,0),
-                                   no_bias=True, workspace=workspace, name=name + '_conv1')
-        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
-        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
-        conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(1,1),
-                                   no_bias=True, workspace=workspace, name=name + '_conv2')
-        bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
-        act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3')
-        conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
-                                   workspace=workspace, name=name + '_conv3')
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
-                                            workspace=workspace, name=name+'_sc')
-        if memonger:
-            shortcut._set_attr(mirror_stage='True')
-        return conv3 + shortcut
-    else:
-        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
-        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
-        conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv1')
-        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
-        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
-        conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
-                                      no_bias=True, workspace=workspace, name=name + '_conv2')
-        if dim_match:
-            shortcut = data
-        else:
-            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
-                                            workspace=workspace, name=name+'_sc')
-        if memonger:
-            shortcut._set_attr(mirror_stage='True')
-        return conv2 + shortcut
-
-def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, dtype='float32', memonger=False):
-    """Return ResNet symbol of
-    Parameters
-    ----------
-    units : list
-        Number of units in each stage
-    num_stages : int
-        Number of stage
-    filter_list : list
-        Channel size of each stage
-    num_classes : int
-        Ouput size of symbol
-    dataset : str
-        Dataset type, only cifar10 and imagenet supports
-    workspace : int
-        Workspace used in convolution operator
-    dtype : str
-        Precision (float32 or float16)
-    """
-    num_unit = len(units)
-    assert(num_unit == num_stages)
-    data = mx.sym.Variable(name='data')
-    if dtype == 'float32':
-        # data = mx.sym.identity(data=data, name='id')
-        data = data
-    else:
-        if dtype == 'float16':
-            data = mx.sym.Cast(data=data, dtype=np.float16)
-    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
-    (nchannel, height, width) = image_shape
-    if height <= 32:            # such as cifar10
-        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
-                                  no_bias=True, name="conv0", workspace=workspace)
-    else:                       # often expected to be 224 such as imagenet
-        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
-                                  no_bias=True, name="conv0", workspace=workspace)
-        body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
-        body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
-        body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
-
-    for i in range(num_stages):
-        body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
-                             name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
-                             memonger=memonger)
-        for j in range(units[i]-1):
-            body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
-                                 bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
-    bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
-    relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
-    # Although kernel is not used here when global_pool=True, we should put one
-    pool1 = mx.sym.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
-    flat = mx.sym.Flatten(data=pool1)
-    try:
-        fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1', flatten=False)
-    except:
-        fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
-    if dtype == 'float16':
-        fc1 = mx.sym.Cast(data=fc1, dtype=np.float32)
-    return mx.sym.softmax(data=fc1, name='softmax')
-
-def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, dtype='float32', **kwargs):
-    """
-    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
-    Original author Wei Wu
-    """
-    image_shape = [int(l) for l in image_shape.split(',')]
-    (nchannel, height, width) = image_shape
-    if height <= 28:
-        num_stages = 3
-        if (num_layers-2) % 9 == 0 and num_layers >= 164:
-            per_unit = [(num_layers-2)//9]
-            filter_list = [16, 64, 128, 256]
-            bottle_neck = True
-        elif (num_layers-2) % 6 == 0 and num_layers < 164:
-            per_unit = [(num_layers-2)//6]
-            filter_list = [16, 16, 32, 64]
-            bottle_neck = False
-        else:
-            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-        units = per_unit * num_stages
-    else:
-        if num_layers >= 50:
-            filter_list = [64, 256, 512, 1024, 2048]
-            bottle_neck = True
-        else:
-            filter_list = [64, 64, 128, 256, 512]
-            bottle_neck = False
-        num_stages = 4
-        if num_layers == 18:
-            units = [2, 2, 2, 2]
-        elif num_layers == 34:
-            units = [3, 4, 6, 3]
-        elif num_layers == 50:
-            units = [3, 4, 6, 3]
-        elif num_layers == 101:
-            units = [3, 4, 23, 3]
-        elif num_layers == 152:
-            units = [3, 8, 36, 3]
-        elif num_layers == 200:
-            units = [3, 24, 36, 3]
-        elif num_layers == 269:
-            units = [3, 30, 48, 8]
-        else:
-            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
-
-    return resnet(units       = units,
-                  num_stages  = num_stages,
-                  filter_list = filter_list,
-                  num_classes = num_classes,
-                  image_shape = image_shape,
-                  bottle_neck = bottle_neck,
-                  workspace   = conv_workspace,
-                  dtype       = dtype)
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/squeezenet.py b/nnvm/tests/python/frontend/mxnet/model_zoo/squeezenet.py
deleted file mode 100644
index 093da51a78a7..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-import mxnet as mx
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0)
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1)
-    # NOTE : Assume NCHW layout here
-    net = mx.sym.concat(left, right, dim=1)
-
-    return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
-    net = mx.sym.Convolution(net, num_filter=channels, kernel=(kernel_size, kernel_size),
-                             pad=(padding, padding))
-    net = mx.sym.Activation(net, act_type='relu')
-    return net
-
-# Net
-def get_symbol(num_classes=1000, version='1.0', **kwargs):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    num_classes: int
-        The number of classification results
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-    """
-    assert version in ['1.0', '1.1'], ("Unsupported SqueezeNet version {version}:"
-                                       "1.0 or 1.1 expected".format(version=version))
-    net = mx.sym.Variable("data")
-    if version == '1.0':
-        net = mx.sym.Convolution(net, num_filter=96, kernel=(7, 7), stride=(2, 2), pad=(3, 3))
-        net = mx.sym.Activation(net, act_type='relu')
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 32, 128, 128)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
-        net = _make_fire(net, 64, 256, 256)
-    else:
-        net = mx.sym.Convolution(net, num_filter=64, kernel=(3, 3), stride=(2, 2), pad=(1, 1))
-        net = mx.sym.Activation(net, act_type='relu')
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max', stride=(2, 2))
-        net = _make_fire(net, 16, 64, 64)
-        net = _make_fire(net, 16, 64, 64)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max',  stride=(2, 2))
-        net = _make_fire(net, 32, 128, 128)
-        net = _make_fire(net, 32, 128, 128)
-        net = mx.sym.Pooling(data=net, kernel=(3, 3), pool_type='max',  stride=(2, 2))
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 48, 192, 192)
-        net = _make_fire(net, 64, 256, 256)
-        net = _make_fire(net, 64, 256, 256)
-    net = mx.sym.Dropout(net, p=0.5)
-    net = mx.sym.Convolution(net, num_filter=num_classes, kernel=(1, 1))
-    net = mx.sym.Activation(net, act_type='relu')
-    net = mx.sym.Pooling(data=net, global_pool=True, kernel=(13, 13), pool_type='avg')
-    net = mx.sym.flatten(net)
-    return mx.sym.softmax(net)
diff --git a/nnvm/tests/python/frontend/mxnet/model_zoo/vgg.py b/nnvm/tests/python/frontend/mxnet/model_zoo/vgg.py
deleted file mode 100644
index 68215bb80aaa..000000000000
--- a/nnvm/tests/python/frontend/mxnet/model_zoo/vgg.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""References:
-
-Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for
-large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).
-"""
-
-import mxnet as mx
-import numpy as np
-
-def get_feature(internel_layer, layers, filters, batch_norm = False, **kwargs):
-    for i, num in enumerate(layers):
-        for j in range(num):
-            internel_layer = mx.sym.Convolution(data = internel_layer, kernel=(3, 3), pad=(1, 1), num_filter=filters[i], name="conv%s_%s" %(i + 1, j + 1))
-            if batch_norm:
-                internel_layer = mx.symbol.BatchNorm(data=internel_layer, name="bn%s_%s" %(i + 1, j + 1))
-            internel_layer = mx.sym.Activation(data=internel_layer, act_type="relu", name="relu%s_%s" %(i + 1, j + 1))
-        internel_layer = mx.sym.Pooling(data=internel_layer, pool_type="max", kernel=(2, 2), stride=(2,2), name="pool%s" %(i + 1))
-    return internel_layer
-
-def get_classifier(input_data, num_classes, **kwargs):
-    flatten = mx.sym.Flatten(data=input_data, name="flatten")
-    try:
-        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6", flatten=False)
-        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
-        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
-        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7", flatten=False)
-        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
-        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
-        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8", flatten=False)
-    except:
-        fc6 = mx.sym.FullyConnected(data=flatten, num_hidden=4096, name="fc6")
-        relu6 = mx.sym.Activation(data=fc6, act_type="relu", name="relu6")
-        drop6 = mx.sym.Dropout(data=relu6, p=0.5, name="drop6")
-        fc7 = mx.sym.FullyConnected(data=drop6, num_hidden=4096, name="fc7")
-        relu7 = mx.sym.Activation(data=fc7, act_type="relu", name="relu7")
-        drop7 = mx.sym.Dropout(data=relu7, p=0.5, name="drop7")
-        fc8 = mx.sym.FullyConnected(data=drop7, num_hidden=num_classes, name="fc8")
-    return fc8
-
-def get_symbol(num_classes, num_layers=11, batch_norm=False, dtype='float32', **kwargs):
-    """
-    Parameters
-    ----------
-    num_classes : int, default 1000
-        Number of classification classes.
-    num_layers : int
-        Number of layers for the variant of densenet. Options are 11, 13, 16, 19.
-    batch_norm : bool, default False
-        Use batch normalization.
-    dtype: str, float32 or float16
-        Data precision.
-    """
-    vgg_spec = {11: ([1, 1, 2, 2, 2], [64, 128, 256, 512, 512]),
-                13: ([2, 2, 2, 2, 2], [64, 128, 256, 512, 512]),
-                16: ([2, 2, 3, 3, 3], [64, 128, 256, 512, 512]),
-                19: ([2, 2, 4, 4, 4], [64, 128, 256, 512, 512])}
-    if num_layers not in vgg_spec:
-        raise ValueError("Invalide num_layers {}. Possible choices are 11,13,16,19.".format(num_layers))
-    layers, filters = vgg_spec[num_layers]
-    data = mx.sym.Variable(name="data")
-    if dtype == 'float16':
-        data = mx.sym.Cast(data=data, dtype=np.float16)
-    feature = get_feature(data, layers, filters, batch_norm)
-    classifier = get_classifier(feature, num_classes)
-    if dtype == 'float16':
-        classifier = mx.sym.Cast(data=classifier, dtype=np.float32)
-    symbol = mx.sym.softmax(data=classifier, name='softmax')
-    return symbol
diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py
deleted file mode 100644
index dd315c6f87b0..000000000000
--- a/nnvm/tests/python/frontend/mxnet/test_forward.py
+++ /dev/null
@@ -1,333 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import topi
-import tvm
-from tvm.contrib import graph_runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing.config import ctx_list
-from nnvm import frontend
-import mxnet as mx
-from mxnet import gluon
-from mxnet.gluon.model_zoo import vision
-import model_zoo
-
-
-def verify_mxnet_frontend_impl(mx_symbol, data_shape=(1, 3, 224, 224), out_shape=(1, 1000),
-                               gluon_impl=False, name=None, dtype='float32'):
-    """Use name different from test to avoid pytest picking it up"""
-    if gluon_impl:
-        def get_gluon_output(name, x):
-            net = vision.get_model(name)
-            net.collect_params().initialize(mx.init.Xavier())
-            net_sym = gluon.nn.SymbolBlock(outputs=net(mx.sym.var('data')),
-                                           inputs=mx.sym.var('data'),
-                                           params=net.collect_params())
-            out = net_sym(mx.nd.array(x.astype(dtype))).asnumpy()
-            return out, net_sym
-    else:
-        def get_mxnet_output(symbol, x, dtype='float32'):
-            from collections import namedtuple
-            Batch = namedtuple('Batch', ['data'])
-            mod = mx.mod.Module(symbol, label_names=None)
-            mod.bind(data_shapes=[('data', x.shape)], for_training=False)
-            mod.init_params()
-            mod.forward(Batch([mx.nd.array(x.astype(dtype))]))
-            out = mod.get_outputs()[0].asnumpy()
-            args, auxs = mod.get_params()
-            return out, args, auxs
-
-    def get_tvm_output(symbol, x, args, auxs, target, ctx, dtype='float32'):
-        if gluon_impl:
-            new_sym, params = frontend.from_mxnet(symbol)
-        else:
-            new_sym, params = frontend.from_mxnet(symbol, args, auxs)
-
-        dshape = x.shape
-        shape_dict = {'data': dshape}
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        # set inputs
-        m.set_input("data", tvm.nd.array(x.astype(dtype)))
-        m.set_input(**params)
-        m.run()
-        # get outputs
-        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
-        return out.asnumpy()
-
-    # random input
-    x = np.random.uniform(size=data_shape)
-    if gluon_impl:
-        gluon_out, gluon_sym = get_gluon_output(name, x)
-        for target, ctx in ctx_list():
-            tvm_out = get_tvm_output(gluon_sym, x, None, None, target, ctx, dtype)
-            tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
-    else:
-        mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
-        assert "data" not in args
-        for target, ctx in ctx_list():
-            tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype)
-            tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_mlp():
-    mlp = model_zoo.mx_mlp
-    verify_mxnet_frontend_impl(mlp)
-
-def test_forward_vgg():
-    for n in [11]:
-        mx_sym = model_zoo.mx_vgg[n]
-        verify_mxnet_frontend_impl(mx_sym)
-
-def test_forward_resnet():
-    for n in [18]:
-        mx_sym = model_zoo.mx_resnet[n]
-        verify_mxnet_frontend_impl(mx_sym)
-
-def test_forward_elu():
-    data = mx.sym.var('data')
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type='elu')
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_rrelu():
-    data = mx.sym.var('data')
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type='rrelu', lower_bound=0.3, upper_bound=0.7)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_prelu():
-    data = mx.sym.var('data')
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.LeakyReLU(data, act_type='prelu')
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_softrelu():
-    data = mx.sym.var('data')
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.Activation(data, act_type='softrelu')
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_fc_flatten():
-    # test flatten=True option in mxnet 0.11.1
-    data = mx.sym.var('data')
-    try:
-        mx_sym = mx.sym.FullyConnected(data, num_hidden=100, flatten=True)
-        verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
-        mx_sym = mx.sym.FullyConnected(mx.sym.Flatten(data), num_hidden=100, flatten=False)
-        verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 100))
-    except:
-        pass
-
-def test_forward_clip():
-    data = mx.sym.var('data')
-    data = mx.sym.concat(data, -data, dim=1)  # negative part explicitly
-    mx_sym = mx.sym.clip(data, a_min=0, a_max=1)
-    verify_mxnet_frontend_impl(mx_sym, (1, 3, 100, 100), (1, 6, 100, 100))
-
-def test_forward_split():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=False)
-    verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 1, 2, 1))
-
-def test_forward_split_squeeze():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.split(data, axis=1, num_outputs=4, squeeze_axis=True)
-    verify_mxnet_frontend_impl(mx_sym, (1, 4, 2, 1), (1, 2, 1))
-
-def test_forward_expand_dims():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.expand_dims(data, axis=1)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 1, 3, 4))
-
-def test_forward_pooling():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='avg')
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-    mx_sym = mx.sym.Pooling(data, kernel=(3, 3), pad=(1, 1), pool_type='max')
-    verify_mxnet_frontend_impl(mx_sym, (1, 20, 8, 8), (1, 20, 8, 8))
-
-def test_forward_lrn():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.LRN(data, alpha=2, beta=2, knorm=1, nsize=5)
-    verify_mxnet_frontend_impl(mx_sym, (1, 10, 24, 24), (1, 10, 24, 24))
-
-def test_forward_ones():
-    data = mx.sym.var('data')
-    ones = mx.sym.ones(shape=(2, 3, 4), dtype='float32')
-    mx_sym = mx.sym.elemwise_add(data, ones)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_zeros():
-    data = mx.sym.var('data')
-    zeros = mx.sym.zeros(shape=(2, 3, 4), dtype='float32')
-    mx_sym = mx.sym.elemwise_add(data, zeros)
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_ones_like():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.ones_like(data, dtype='float32')
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_zeros_like():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.zeros_like(data, dtype='float32')
-    verify_mxnet_frontend_impl(mx_sym, (2, 3, 4), (2, 3, 4))
-
-def test_forward_argmax():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.argmax(data, axis=1)
-    verify_mxnet_frontend_impl(mx_sym, (5, 3), (5,))
-
-def test_forward_argmin():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.argmin(data, axis=0)
-    verify_mxnet_frontend_impl(mx_sym, (5, 4), (4,))
-
-def test_forward_where():
-    cond = mx.sym.var('cond')
-    x = mx.sym.var('x')
-    y = mx.sym.var('y')
-    dshape = (2, 2)
-    dtype = 'float32'
-    mx_sym = mx.sym.where(cond, x, y)
-    np_cond = np.array([[0, 1], [-1, 0]]).astype(dtype)
-    np_x = np.random.uniform(size=dshape).astype(dtype)
-    np_y = np.random.uniform(size=dshape).astype(dtype)
-    mx_cond = mx.nd.array(np_cond)
-    mx_x = mx.nd.array(np_x)
-    mx_y = mx.nd.array(np_y)
-    mod = mx.mod.Module(mx_sym, label_names=None, data_names=['cond', 'x', 'y'])
-    mod.bind(data_shapes=[('cond', dshape), ('x', dshape), ('y', dshape)], for_training=False)
-    mod.init_params()
-    args, auxs = mod.get_params()
-    mx_out = mx.nd.where(mx_cond, mx_x, mx_y).asnumpy()
-    out_shape = dshape
-    new_sym, params = frontend.from_mxnet(mx_sym, args, auxs)
-    shape_dict = {'cond': dshape, 'x': dshape, 'y': dshape}
-    for target, ctx in ctx_list():
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        # set inputs
-        m.set_input("cond", tvm.nd.array(np_cond))
-        m.set_input("x", tvm.nd.array(np_x))
-        m.set_input("y", tvm.nd.array(np_y))
-        m.set_input(**params)
-        m.run()
-        # get outputs
-        tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
-        tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_slice():
-    data = mx.sym.var('data')
-    mx_sym = mx.sym.slice(data, begin=(0, 1), end=(2, 4))
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 3))
-    mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2))
-    verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2))
-
-def test_forward_maximum():
-    a = mx.sym.var('a')
-    b = mx.sym.var('b')
-    dshape = (10, 20)
-    dtype = 'float32'
-    mx_sym = mx.sym._internal._maximum(a, b)
-    np_a = np.random.uniform(size=dshape).astype(dtype)
-    np_b = np.random.uniform(size=dshape).astype(dtype)
-    mx_a = mx.nd.array(np_a)
-    mx_b = mx.nd.array(np_b)
-    mod = mx.mod.Module(mx_sym, label_names=None, data_names=['a', 'b'])
-    mod.bind(data_shapes=[('a', dshape), ('b', dshape)], for_training=False)
-    mod.init_params()
-    args, auxs = mod.get_params()
-    mx_out = mx.nd._internal._maximum(mx_a, mx_b).asnumpy()
-    out_shape = dshape
-    new_sym, params = frontend.from_mxnet(mx_sym, args, auxs)
-    shape_dict = {'a': dshape, 'b': dshape}
-    for target, ctx in ctx_list():
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        # set inputs
-        m.set_input("a", tvm.nd.array(np_a))
-        m.set_input("b", tvm.nd.array(np_b))
-        m.set_input(**params)
-        m.run()
-        # get outputs
-        tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
-        tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_minimum():
-    a = mx.sym.var('a')
-    b = mx.sym.var('b')
-    dshape = (10, 20)
-    dtype = 'float32'
-    mx_sym = mx.sym._internal._minimum(a, b)
-    np_a = np.random.uniform(size=dshape).astype(dtype)
-    np_b = np.random.uniform(size=dshape).astype(dtype)
-    mx_a = mx.nd.array(np_a)
-    mx_b = mx.nd.array(np_b)
-    mod = mx.mod.Module(mx_sym, label_names=None, data_names=['a', 'b'])
-    mod.bind(data_shapes=[('a', dshape), ('b', dshape)], for_training=False)
-    mod.init_params()
-    args, auxs = mod.get_params()
-    mx_out = mx.nd._internal._minimum(mx_a, mx_b).asnumpy()
-    out_shape = dshape
-    new_sym, params = frontend.from_mxnet(mx_sym, args, auxs)
-    shape_dict = {'a': dshape, 'b': dshape}
-    for target, ctx in ctx_list():
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(new_sym, target, shape_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        # set inputs
-        m.set_input("a", tvm.nd.array(np_a))
-        m.set_input("b", tvm.nd.array(np_b))
-        m.set_input(**params)
-        m.run()
-        # get outputs
-        tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
-        tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-
-if __name__ == '__main__':
-    test_forward_mlp()
-    test_forward_vgg()
-    test_forward_resnet()
-    test_forward_elu()
-    test_forward_rrelu()
-    test_forward_prelu()
-    test_forward_softrelu()
-    test_forward_fc_flatten()
-    test_forward_clip()
-    test_forward_split()
-    test_forward_split_squeeze()
-    test_forward_expand_dims()
-    test_forward_pooling()
-    test_forward_lrn()
-    test_forward_ones()
-    test_forward_zeros()
-    test_forward_ones_like()
-    test_forward_zeros_like()
-    test_forward_argmax()
-    test_forward_argmin()
-    test_forward_where()
-    test_forward_slice()
-    test_forward_maximum()
-    test_forward_minimum()
diff --git a/nnvm/tests/python/frontend/mxnet/test_graph.py b/nnvm/tests/python/frontend/mxnet/test_graph.py
deleted file mode 100644
index 1bbd0a97e8e1..000000000000
--- a/nnvm/tests/python/frontend/mxnet/test_graph.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import mxnet as mx
-import nnvm
-from nnvm.compiler import graph_util, graph_attr
-import model_zoo
-
-def compare_graph(sym1, sym2, ishape=(2, 3, 224, 224)):
-    g1 = nnvm.graph.create(sym1)
-    g2 = nnvm.graph.create(sym2)
-    graph_attr.set_shape_inputs(g1, {'data':ishape})
-    graph_attr.set_shape_inputs(g2, {'data':ishape})
-    g1 = g1.apply("InferShape").apply("SimplifyInference")
-    g2 = g2.apply("InferShape").apply("SimplifyInference")
-    graph_util.check_graph_equal(g1, g2)
-
-def test_mlp():
-    mx_sym = model_zoo.mx_mlp
-    from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-    nnvm_sym = model_zoo.nnvm_mlp
-    compare_graph(from_mx_sym, nnvm_sym)
-
-def test_vgg():
-    for n in [11, 13, 16, 19]:
-        mx_sym = model_zoo.mx_vgg[n]
-        from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-        nnvm_sym = model_zoo.nnvm_vgg[n]
-        compare_graph(from_mx_sym, nnvm_sym)
-
-def test_resnet():
-    for n in [18, 34, 50, 101]:
-        mx_sym = model_zoo.mx_resnet[n]
-        from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-        nnvm_sym = model_zoo.nnvm_resnet[n]
-        compare_graph(from_mx_sym, nnvm_sym)
-
-def test_squeezenet():
-    for version in ['1.0', '1.1']:
-        mx_sym = model_zoo.mx_squeezenet[version]
-        from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-        nnvm_sym = model_zoo.nnvm_squeezenet[version]
-        compare_graph(from_mx_sym, nnvm_sym)
-
-def test_inception_v3():
-    mx_sym = model_zoo.mx_inception_v3
-    from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-    nnvm_sym = model_zoo.nnvm_inception_v3
-    compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 3, 299, 299))
-
-def test_dqn():
-    mx_sym = model_zoo.mx_dqn
-    from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-    nnvm_sym = model_zoo.nnvm_dqn
-    compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 4, 84, 84))
-
-def test_dcgan():
-    mx_sym = model_zoo.mx_dcgan
-    from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-    nnvm_sym = model_zoo.nnvm_dcgan
-    compare_graph(from_mx_sym, nnvm_sym, ishape=(2, 100))
-
-def test_multi_outputs():
-    def compose(F, **kwargs):
-        x = F.sym.Variable('x')
-        y = F.sym.Variable('y')
-        z = F.sym.split(x, **kwargs)
-        return F.sym.broadcast_sub(F.sym.broadcast_add(z[0], z[2]), y)
-    mx_sym = compose(mx, num_outputs=3, axis=1)
-    from_mx_sym, _ = nnvm.frontend.from_mxnet(mx_sym)
-    nnvm_sym = compose(nnvm, indices_or_sections=3, axis=1)
-    compare_graph(from_mx_sym, nnvm_sym)
-
-if __name__ == '__main__':
-    test_mlp()
-    test_vgg()
-    test_resnet()
-    test_multi_outputs()
-    test_dqn()
-    test_dcgan()
-    test_squeezenet()
-    test_inception_v3()
diff --git a/nnvm/tests/python/frontend/onnx/model_zoo/__init__.py b/nnvm/tests/python/frontend/onnx/model_zoo/__init__.py
deleted file mode 100644
index f5eb604acfd7..000000000000
--- a/nnvm/tests/python/frontend/onnx/model_zoo/__init__.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Store for onnx examples and common models."""
-from __future__ import absolute_import as _abs
-import os
-import logging
-from .super_resolution import get_super_resolution
-from tvm.contrib.download import download_testdata
-
-
-URLS = {
-    'super_resolution.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/super_resolution_0.2.onnx',
-    'squeezenet1_1.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/squeezenet1_1_0.2.onnx',
-    'lenet.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/93672b029103648953c4e5ad3ac3aadf346a4cdc/lenet_0.2.onnx',
-    'resnet18_1_0.onnx': 'https://gist.github.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/b385b1b242dc89a35dd808235b885ed8a19aedc1/resnet18_1.0.onnx'}
-
-# download and add paths
-for k, v  in URLS.items():
-    name = k.split('.')[0]
-    relpath = os.path.join('onnx', k)
-    abspath = download_testdata(v, relpath, module='onnx')
-    locals()[name] = abspath
-
-# symbol for graph comparison
-super_resolution_sym = get_super_resolution()
diff --git a/nnvm/tests/python/frontend/onnx/model_zoo/squeezenet.py b/nnvm/tests/python/frontend/onnx/model_zoo/squeezenet.py
deleted file mode 100644
index 2de2d1075494..000000000000
--- a/nnvm/tests/python/frontend/onnx/model_zoo/squeezenet.py
+++ /dev/null
@@ -1,118 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# coding: utf-8
-# pylint: disable=unused-argument
-
-"""
-Symbol of SqueezeNet
-
-Reference:
-Iandola, Forrest N., et al.
-"Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size." (2016).
-"""
-
-from nnvm import symbol as sym
-from nnvm.testing.utils import create_workload
-
-# Helpers
-def _make_fire(net, squeeze_channels, expand1x1_channels, expand3x3_channels):
-    net = _make_fire_conv(net, squeeze_channels, 1, 0)
-
-    left = _make_fire_conv(net, expand1x1_channels, 1, 0)
-    right = _make_fire_conv(net, expand3x3_channels, 3, 1)
-    # NOTE : Assume NCHW layout here
-    net = sym.concatenate(left, right, axis=1)
-
-    return net
-
-def _make_fire_conv(net, channels, kernel_size, padding=0):
-    net = sym.conv2d(net, channels=channels, kernel_size=(kernel_size, kernel_size),
-                     padding=(padding, padding))
-    net = sym.relu(net)
-    return net
-
-# Net
-def get_symbol(num_classes, version, **kwargs):
-    """Get symbol of SqueezeNet
-
-    Parameters
-    ----------
-    num_classes: int
-        The number of classification results
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-    """
-    assert version == '1.1', ("Unsupported SqueezeNet version {version}:"
-                              "1.1 expected".format(version=version))
-    net = sym.Variable("data")
-
-    net = sym.conv2d(net, channels=64, kernel_size=(3, 3), strides=(2, 2))
-    net = sym.relu(net)
-    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 16, 64, 64)
-    net = _make_fire(net, 16, 64, 64)
-    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 32, 128, 128)
-    net = _make_fire(net, 32, 128, 128)
-    net = sym.max_pool2d(net, pool_size=(3, 3), strides=(2, 2))
-    net = _make_fire(net, 48, 192, 192)
-    net = _make_fire(net, 48, 192, 192)
-    net = _make_fire(net, 64, 256, 256)
-    net = _make_fire(net, 64, 256, 256)
-
-    net = sym.dropout(net, rate=0.5)
-    net = sym.conv2d(net, channels=num_classes, kernel_size=(1, 1))
-    net = sym.relu(net)
-    net = sym.global_avg_pool2d(net)
-    return sym.softmax(net, axis=1)
-
-def get_workload(batch_size=1, num_classes=1000, version='1.0',
-                 image_shape=(3, 224, 224), dtype="float32", **kwargs):
-    """Get benchmark workload for SqueezeNet
-
-    Parameters
-    ----------
-    batch_size : int
-        The batch size used in the model
-
-    num_classes : int, optional
-        Number of classes
-
-    version : str, optional
-        "1.0" or "1.1" of SqueezeNet
-
-    image_shape : tuple, optional
-        The input image shape
-
-    dtype : str, optional
-        The data type
-
-    kwargs : dict
-        Extra arguments
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The computational graph
-
-    params : dict of str to NDArray
-        The parameters.
-    """
-    net = get_symbol(num_classes=num_classes, version=version, **kwargs)
-    return create_workload(net, batch_size, image_shape, dtype)
diff --git a/nnvm/tests/python/frontend/onnx/model_zoo/super_resolution.py b/nnvm/tests/python/frontend/onnx/model_zoo/super_resolution.py
deleted file mode 100644
index a98478e58307..000000000000
--- a/nnvm/tests/python/frontend/onnx/model_zoo/super_resolution.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""NNVM symbol corresponding to super_resolution.onnx example."""
-from nnvm import sym
-
-def get_super_resolution():
-    factor = 3
-    size = 224
-    data = sym.Variable(name='9')
-    conv1 = sym.conv2d(data, channels=64, kernel_size=(5, 5), padding=(2, 2), use_bias=False)
-    relu1 = sym.relu(conv1 + sym.expand_dims(sym.Variable(name='2', shape=(64)), axis=1, num_newaxis=2))
-    conv2 = sym.conv2d(relu1, channels=64, kernel_size=(3, 3), padding=(1, 1), use_bias=False)
-    relu2 = sym.relu(conv2 + sym.expand_dims(sym.Variable(name='4', shape=(64)), axis=1, num_newaxis=2))
-    conv3 = sym.conv2d(relu2, channels=32, kernel_size=(3, 3), padding=(1, 1), use_bias=False)
-    relu3 = sym.relu(conv3 + sym.expand_dims(sym.Variable(name='6', shape=(32)), axis=1, num_newaxis=2))
-    conv4 = sym.conv2d(relu3, channels=factor**2, kernel_size=(3, 3), padding=(1, 1), use_bias=False)
-    conv4 = conv4 + sym.expand_dims(sym.Variable(name='8', shape=(factor**2)), axis=1, num_newaxis=2)
-    # TODO(zhreshold): allow shape inference for batch size > 1
-    r1 = sym.reshape(conv4, shape=(1, 1, factor, factor, size, size))
-    t1 = sym.transpose(r1, axes=(0, 1, 4, 2, 5, 3))
-    r2 = sym.reshape(t1, shape=(1, 1, size * factor, size * factor))
-    return r2
diff --git a/nnvm/tests/python/frontend/onnx/test_forward.py b/nnvm/tests/python/frontend/onnx/test_forward.py
deleted file mode 100644
index 8cb6876956c4..000000000000
--- a/nnvm/tests/python/frontend/onnx/test_forward.py
+++ /dev/null
@@ -1,1099 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import math
-import nnvm
-import topi
-import topi.testing
-import tvm
-from tvm.contrib import graph_runtime
-from nnvm.testing.config import ctx_list
-import onnx
-from model_zoo import super_resolution, squeezenet1_1, lenet, resnet18_1_0
-from onnx import helper, TensorProto
-
-def get_tvm_output(graph_def, input_data, target, ctx, output_shape=None, output_dtype='float32'):
-    """ Generic function to execute and get tvm output"""
-
-    sym, params = nnvm.frontend.from_onnx(graph_def)
-    target = 'llvm'
-    if isinstance(input_data, list):
-        input_names = {}
-        shape_dict = {}
-        dtype_dict = {}
-        for i, _ in enumerate(input_data):
-            input_names[i] = graph_def.graph.input[i].name
-            shape_dict[input_names[i]] = input_data[i].shape
-            dtype_dict[input_names[i]] = input_data[i].dtype
-    else:
-        input_names = graph_def.graph.input[0].name
-        shape_dict = {input_names: input_data.shape}
-        dtype_dict = {input_names: input_data.dtype}
-
-    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict,
-                                             dtype=dtype_dict, params=params)
-
-    ctx = tvm.cpu(0)
-    from tvm.contrib import graph_runtime
-    m = graph_runtime.create(graph, lib, ctx)
-    # set inputs
-    if isinstance(input_data, list):
-        for i, e in enumerate(input_names):
-            m.set_input(input_names[i], tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-    else:
-        m.set_input(input_names, tvm.nd.array(input_data.astype(input_data.dtype)))
-
-    m.set_input(**params)
-    # execute
-    m.run()
-    # get outputs
-    if isinstance(output_shape, list) and isinstance(output_dtype, list):
-        tvm_output_list = []
-        for i, _ in enumerate(output_shape):
-            tvm_output = m.get_output(i)
-            tvm_output_list.append(tvm_output.asnumpy())
-        return tvm_output_list
-    else:
-        tvm_output = m.get_output(0)
-        return tvm_output.asnumpy()
-
-def get_caffe2_output(model, x, dtype='float32'):
-    import caffe2.python.onnx.backend
-    prepared_backend = caffe2.python.onnx.backend.prepare(model)
-    W = {model.graph.input[0].name: x.astype(dtype)}
-    c2_out = prepared_backend.run(W)[0]
-    return c2_out
-
-
-def verify_onnx_forward_impl(graph_file, data_shape, out_shape):
-    dtype = 'float32'
-    x = np.random.uniform(size=data_shape)
-    model = onnx.load_model(graph_file)
-    c2_out = get_caffe2_output(model, x, dtype)
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype)
-        tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def verify_super_resolution_example():
-    verify_onnx_forward_impl(super_resolution, (1, 1, 224, 224), (1, 1, 672, 672))
-
-def verify_squeezenet1_1():
-    verify_onnx_forward_impl(squeezenet1_1, (1, 3, 224, 224), (1, 1000))
-
-def verify_lenet():
-    verify_onnx_forward_impl(lenet, (1, 1, 28, 28), (1, 10))
-
-def verify_resnet18():
-    verify_onnx_forward_impl(resnet18_1_0, (1, 3, 224, 224), (1, 1000))
-
-
-def test_reshape():
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (3, 4, 4, 3)
-
-    ref_array = np.array(ref_shape)
-    ref_node = onnx.helper.make_node('Constant',
-                                 inputs=[],
-                                 outputs=['ref_in'],
-                                 value=onnx.helper.make_tensor(name = 'const_tensor',
-                                                               data_type = onnx.TensorProto.INT32,
-                                                               dims = ref_array.shape,
-                                                               vals = ref_array.flatten().astype(int)))
-    reshape_node = helper.make_node("Reshape", ["in", "ref_in"], ["out"])
-
-    graph = helper.make_graph([ref_node, reshape_node],
-                              "reshape_test",
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(ref_shape))])
-
-    model = helper.make_model(graph, producer_name='reshape_test')
-
-    for target, ctx in ctx_list():
-        x = np.random.uniform(size=in_shape).astype('int32')
-        tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
-
-    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-def test_reshape_like():
-    in_shape = (4, 3, 3, 4)
-    ref_shape = (3, 4, 4, 3)
-
-    ref_array = np.random.uniform(size=ref_shape).astype('float32')
-    ref_node = onnx.helper.make_node('Constant',
-                                 inputs=[],
-                                 outputs=['ref_in'],
-                                 value=onnx.helper.make_tensor(name = 'const_tensor',
-                                                               data_type = onnx.TensorProto.FLOAT,
-                                                               dims = ref_array.shape,
-                                                               vals = ref_array.flatten().astype(float)))
-    copy_node = helper.make_node("Identity", ["ref_in"], ["copy_in"])
-    reshape_node = helper.make_node("Reshape", ["in", "copy_in"], ["out"])
-
-    graph = helper.make_graph([ref_node, copy_node, reshape_node],
-                              "reshape_like_test",
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(ref_shape))])
-
-    model = helper.make_model(graph, producer_name='reshape_like_test')
-
-    for target, ctx in ctx_list():
-        x = np.random.uniform(size=in_shape).astype('float32')
-        tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')
-
-    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)
-
-def _test_power_iteration(x_shape, y_shape):
-    if isinstance(y_shape, int):
-        y_shape = [y_shape]
-
-    x = np.random.uniform(size=x_shape).astype(np.float32)
-    y = np.random.uniform(size=y_shape).astype(np.float32)
-
-    np_res = np.power(x, y).astype(np.float32)
-
-    res = helper.make_node("Pow", ['x', 'y'], ['out'])
-
-    graph = helper.make_graph([res],
-                              'power_test',
-                              inputs = [helper.make_tensor_value_info("x",
-                                            TensorProto.FLOAT, list(x_shape)),
-                                        helper.make_tensor_value_info("y",
-                                            TensorProto.FLOAT, list(y_shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(np_res.shape))])
-
-    model = helper.make_model(graph, producer_name='power_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [x, y], target, ctx, np_res.shape)
-        tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_power():
-    _test_power_iteration((1, 3), (1))
-    _test_power_iteration((2, 3), (2, 3))
-    _test_power_iteration((2, 3), (1, 3))
-
-def test_squeeze():
-    in_shape = (1, 3, 1, 3, 1, 1)
-    out_shape = (3, 3)
-    y = helper.make_node("Squeeze", ['in'], ['out'], axes=[0, 2, 4, 5])
-
-    graph = helper.make_graph([y],
-                              'squeeze_test',
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(out_shape))])
-
-    model = helper.make_model(graph, producer_name='squeeze_test')
-
-    for target, ctx in ctx_list():
-        x = np.random.uniform(size=in_shape).astype('float32')
-        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
-
-    tvm.testing.assert_allclose(out_shape, tvm_out.shape)
-
-def test_unsqueeze():
-    in_shape = (3, 3)
-    axis = (0, 3, 4)
-    out_shape = (1, 3, 3, 1, 1)
-    y = helper.make_node("Unsqueeze", ['in'], ['out'], axes=list(axis))
-
-    graph = helper.make_graph([y],
-                              'squeeze_test',
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(out_shape))])
-
-    model = helper.make_model(graph, producer_name='squeeze_test')
-
-    for target, ctx in ctx_list():
-        x = np.random.uniform(size=in_shape).astype('float32')
-        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')
-
-    tvm.testing.assert_allclose(out_shape, tvm_out.shape)
-
-def verify_gather(in_shape, indices, axis, dtype):
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    indices = np.array(indices, dtype="int32")
-    out_np = np.take(x, indices, axis=axis)
-
-    y = helper.make_node("Gather", ['in', 'indices'], ['out'], axis=axis)
-
-    graph = helper.make_graph([y],
-                              'gather_test',
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(in_shape)),
-                                        helper.make_tensor_value_info("indices",
-                                            TensorProto.INT32, list(indices.shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(out_np.shape))])
-    model = helper.make_model(graph, producer_name='gather_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [x, indices], target, ctx, out_np.shape)
-        tvm.testing.assert_allclose(out_np, tvm_out)
-
-def test_gather():
-    verify_gather((4,), [1], 0, 'int32')
-    verify_gather((1,4), [0], 0, 'int32')
-    verify_gather((4,), [[[1,0],[0,1]]], 0, 'float32')
-    verify_gather((2,2), [[[1,0],[0,1]]], 1, 'int32')
-    verify_gather((3,3,3), [[[1,0]]], -1, 'int32')
-    verify_gather((4,3,5,6), [[2,1,0,0]], 0, 'float32')
-
-def _test_slice_iteration(indata, outdata, starts, ends, axes=None):
-    if axes:
-        y = helper.make_node("Slice", ['in'], ['out'], axes=axes, starts=starts, ends=ends)
-    else:
-        y = helper.make_node("Slice", ['in'], ['out'], starts=starts, ends=ends)
-
-    graph = helper.make_graph([y],
-                              'slice_test',
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(indata.shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(outdata.shape))])
-
-    model = helper.make_model(graph, producer_name='slice_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')
-
-    tvm.testing.assert_allclose(outdata, tvm_out)
-
-def test_slice():
-    x = np.random.randn(20, 10, 5).astype(np.float32)
-    _test_slice_iteration(x, x[0:3, 0:10], (0, 0), (3, 10), (0, 1))
-    _test_slice_iteration(x, x[:, :, 3:4], (0, 0, 3), (20, 10, 4))
-    _test_slice_iteration(x, x[:, 1:1000], (1), (1000), (1))
-    _test_slice_iteration(x, x[:, 0:-1], (0), (-1), (1))
-
-def _test_onnx_op_elementwise(inshape, outfunc, npargs, dtype, opname, kwargs, rtol=1e-7, atol=1e-7):
-    indata = np.random.uniform(-1, 1, size=inshape).astype(dtype)
-    outdata = outfunc(indata, **npargs)
-
-    y = helper.make_node(opname, ['in'], ['out'], **kwargs)
-
-    graph = helper.make_graph([y],
-                              opname+'_test',
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(indata.shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(outdata.shape))])
-
-    model = helper.make_model(graph, producer_name=opname+'_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, dtype)
-
-    tvm.testing.assert_allclose(outdata, tvm_out, rtol=rtol, atol=atol)
-
-def test_floor():
-    _test_onnx_op_elementwise((2, 4, 5, 6), np.floor, {}, 'float32', 'Floor', {})
-
-def test_ceil():
-    _test_onnx_op_elementwise((2, 4, 5, 6), np.ceil, {}, 'float32', 'Ceil', {})
-
-def test_clip():
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              np.clip,
-                              {'a_min': -1.0, 'a_max': 1.0},
-                              'float32',
-                              'Clip',
-                              {'min': -1.0, 'max': 1.0})
-
-def test_matmul():
-    a_shape = (4, 3)
-    b_shape = (3, 4)
-
-    a_array = np.random.uniform(size=a_shape).astype('float32')
-    b_array = np.random.uniform(size=b_shape).astype('float32')
-    out_np = np.matmul(a_array, b_array)
-
-    mul_node = helper.make_node("MatMul", ["a", "b"], ["out"])
-
-    graph = helper.make_graph([mul_node],
-                              "matmul_test",
-                              inputs = [helper.make_tensor_value_info("a",
-                                            TensorProto.FLOAT, list(a_shape)),
-                                        helper.make_tensor_value_info("b",
-                                            TensorProto.FLOAT, list(b_shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(out_np.shape))])
-
-    model = helper.make_model(graph, producer_name='matmul_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_array, b_array], target, ctx, out_np.shape)
-        tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
-    in_array = np.random.uniform(size=shape).astype(dtype)
-
-    if alpha == None and beta == None and bias==None:
-        alpha = 0.0001
-        beta = 0.75
-        bias = 1.0
-        node = onnx.helper.make_node('LRN', inputs=['in'], outputs=['out'], size=nsize)
-    else:
-        node = onnx.helper.make_node('LRN', inputs=['in'], outputs=['out'], alpha=alpha,
-                                     beta=beta, bias=bias, size=nsize)
-
-    graph = helper.make_graph([node],
-                              "lrn_test",
-                              inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(shape))],
-                              outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(shape))])
-    model = helper.make_model(graph, producer_name='lrn_test')
-
-    def _get_python_lrn():
-        square_sum = np.zeros(shape).astype(dtype)
-        for n, c, h, w in np.ndindex(in_array.shape):
-            square_sum[n, c, h, w] = sum(in_array[n,
-                                         max(0, c - int(math.floor((nsize - 1) / 2))): \
-                                             min(5, c + int(math.ceil((nsize - 1) / 2)) + 1),
-                                         h,
-                                         w] ** 2)
-        py_out = in_array / ((bias + (alpha / nsize) * square_sum) ** beta)
-        return py_out
-
-    for target, ctx in ctx_list():
-        new_sym, params = nnvm.frontend.from_onnx(model)
-
-        input_name = model.graph.input[0].name
-        shape_dict = {input_name: in_array.shape}
-        dtype_dict = {input_name: dtype}
-        graph, lib, params = nnvm.compiler.build(new_sym, target,
-                                                 shape_dict, dtype_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        # set inputs
-        m.set_input(input_name, tvm.nd.array(in_array.astype(dtype)))
-        m.set_input(**params)
-        m.run()
-        # get outputs
-        tvm_out = m.get_output(0, tvm.nd.empty(shape, dtype))
-        py_out = _get_python_lrn()
-        tvm.testing.assert_allclose(py_out, tvm_out.asnumpy(), rtol=1e-5, atol=1e-5)
-
-def test_lrn():
-    verify_lrn((5, 5, 5, 5), 3, 'float32')
-    verify_lrn((5, 5, 5, 5), 3, 'float32', alpha=0.0002, beta=0.5, bias=2.0)
-
-def _test_upsample_nearest():
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3*scale, 3*scale)
-    y = helper.make_node("Upsample", ['in'], ['out'], mode='nearest', scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-    out_array = topi.testing.upsampling_python(in_array, (scale, scale), "NCHW")
-
-    graph = helper.make_graph([y],
-                              'upsample_nearest_test',
-                              inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))])
-
-    model = helper.make_model(graph, producer_name='upsample_nearest_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32')
-        tvm.testing.assert_allclose(out_array, tvm_out)
-
-def _test_upsample_bilinear():
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3*scale, 3*scale)
-    y = helper.make_node("Upsample", ['in'], ['out'], mode='linear', scales=[1.0, 1.0, 2.0, 2.0])
-
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-    out_array = topi.testing.bilinear_resize_python(in_array, (3*scale, 3*scale), "NCHW", align_corners=False)
-
-    graph = helper.make_graph([y],
-                              'upsample_bilinear_test',
-                              inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))])
-
-    model = helper.make_model(graph, producer_name='upsample_bilinear_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32')
-        tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
-
-def _test_upsample_bilinear_opset9():
-    scale = 2
-    in_shape = (1, 1, 3, 3)
-    out_shape = (1, 1, 3*scale, 3*scale)
-    y = helper.make_node("Upsample", ['in','scales'], ['out'], mode='linear')
-    scales=[1.0, 1.0, 2.0, 2.0]
-    in_array = np.random.uniform(size=in_shape).astype(np.float32)
-    out_array = topi.testing.bilinear_resize_python(in_array, (3*scale, 3*scale), "NCHW", align_corners=False)
-
-    ref_array = np.array(scales)
-    ref_node = helper.make_node('Constant',
-                                 inputs=[],
-                                 outputs=['scales'],
-                                 value=onnx.helper.make_tensor(name = 'const_tensor',
-                                                               data_type = TensorProto.FLOAT,
-                                                               dims = ref_array.shape,
-                                                               vals = ref_array.flatten().astype(float)))
-
-    graph = helper.make_graph([ref_node, y],
-                              'upsample_bilinear_opset9_test',
-                              inputs = [helper.make_tensor_value_info("in", TensorProto.FLOAT, list(in_shape))],
-                              outputs = [helper.make_tensor_value_info("out", TensorProto.FLOAT, list(out_shape))])
-
-    model = helper.make_model(graph, producer_name='upsample_bilinear_opset9_test')
-    inputs = []
-    inputs.append(in_array)
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, inputs, target, ctx, out_shape, 'float32')
-        tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_upsample():
-    _test_upsample_nearest()
-    _test_upsample_bilinear()
-    _test_upsample_bilinear_opset9()
-
-def _test_softmax(inshape, axis):
-    opname = 'Softmax'
-    indata = np.random.uniform(size=inshape).astype(np.float32)
-    outshape = inshape
-    outdata = topi.testing.softmax_python(indata)
-    if isinstance(axis, int):
-        y = helper.make_node(opname, ['in'], ['out'], axis = axis)
-    elif axis is None:
-        y = helper.make_node(opname, ['in'], ['out'])
-
-    graph = helper.make_graph([y],
-                              opname+'_test',
-                              inputs = [helper.make_tensor_value_info("in",
-                                            TensorProto.FLOAT, list(indata.shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(outdata.shape))])
-
-    model = helper.make_model(graph, producer_name=opname+'_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, indata, target, ctx, outshape, 'float32')
-        tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_softmax():
-    _test_softmax((1, 10), None)
-    _test_softmax((1, 10), 1)
-
-def verify_min(input_dim):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.min((a_np1, a_np2, a_np3), axis=0)
-
-    min_node = helper.make_node("Min", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-    graph = helper.make_graph([min_node],
-                              "Min_test",
-                              inputs = [helper.make_tensor_value_info("a_np1",
-                                            TensorProto.FLOAT, list(input_dim)),
-                                        helper.make_tensor_value_info("a_np2",
-                                            TensorProto.FLOAT, list(input_dim)),
-                                        helper.make_tensor_value_info("a_np3",
-                                            TensorProto.FLOAT, list(input_dim))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(b_np.shape))])
-
-    model = helper.make_model(graph, producer_name='Min_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
-        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_min():
-    verify_min((1, 3, 20, 20))
-    verify_min((20, 20))
-
-def verify_max(input_dim):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.max((a_np1, a_np2, a_np3), axis=0)
-
-    max_node = helper.make_node("Max", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-    graph = helper.make_graph([max_node],
-                              "Max_test",
-                              inputs = [helper.make_tensor_value_info("a_np1",
-                                            TensorProto.FLOAT, list(input_dim)),
-                                        helper.make_tensor_value_info("a_np2",
-                                            TensorProto.FLOAT, list(input_dim)),
-                                        helper.make_tensor_value_info("a_np3",
-                                            TensorProto.FLOAT, list(input_dim))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(b_np.shape))])
-
-    model = helper.make_model(graph, producer_name='Max_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
-        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_max():
-    verify_max((1, 3, 20, 20))
-    verify_max((20, 20))
-
-def verify_mean(input_dim):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np2 = np.random.uniform(size=input_dim).astype(dtype)
-    a_np3 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.mean((a_np1, a_np2, a_np3), axis=0)
-
-    mean_node = helper.make_node("Mean", ["a_np1", "a_np2", "a_np3"], ["out"])
-
-    graph = helper.make_graph([mean_node],
-                              "Mean_test",
-                              inputs = [helper.make_tensor_value_info("a_np1",
-                                            TensorProto.FLOAT, list(input_dim)),
-                                        helper.make_tensor_value_info("a_np2",
-                                            TensorProto.FLOAT, list(input_dim)),
-                                        helper.make_tensor_value_info("a_np3",
-                                            TensorProto.FLOAT, list(input_dim))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(b_np.shape))])
-
-    model = helper.make_model(graph, producer_name='Mean_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
-        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_mean():
-    verify_mean((1, 3, 20, 20))
-    verify_mean((20, 20))
-
-def verify_hardsigmoid(input_dim, alpha, beta):
-    dtype = 'float32'
-
-    a_np1 = np.random.uniform(size=input_dim).astype(dtype)
-
-    b_np = np.clip(a_np1 * alpha + beta, 0, 1)
-
-    hardsigmoid_node = helper.make_node("HardSigmoid", ["a_np1"], ["out"], alpha=alpha, beta=beta)
-
-    graph = helper.make_graph([hardsigmoid_node],
-                              "HardSigmoid_test",
-                              inputs = [helper.make_tensor_value_info("a_np1",
-                                            TensorProto.FLOAT, list(input_dim))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(b_np.shape))])
-
-    model = helper.make_model(graph, producer_name='HardSigmoid_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape)
-        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_hardsigmoid():
-    verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
-    verify_hardsigmoid((20, 20), 0.3, 0.4)
-
-def verify_argmin(input_dim, axis=None, keepdims=None):
-    def _argmin_numpy(data, axis=0, keepdims=True):
-        result = np.argmin(data, axis=axis)
-        if (keepdims == 1):
-            result = np.expand_dims(result, axis)
-        return result.astype(data.dtype)
-
-    a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32)
-    if keepdims is None and axis is None:
-        b_np = _argmin_numpy(a_np1)
-        node = onnx.helper.make_node('ArgMin',
-                                     inputs=['a_np1'],
-                                     outputs=['out'])
-    elif axis is None:
-        b_np = _argmin_numpy(a_np1, keepdims=keepdims)
-        node = onnx.helper.make_node('ArgMin',
-                                     inputs=['a_np1'],
-                                     outputs=['out'],
-                                     keepdims=keepdims)
-    elif keepdims is None:
-        b_np = _argmin_numpy(a_np1, axis=axis)
-        node = onnx.helper.make_node('ArgMin',
-                                     inputs=['a_np1'],
-                                     outputs=['out'],
-                                     axis=axis)
-    else:
-        b_np = _argmin_numpy(a_np1, axis=axis, keepdims=keepdims)
-        node = onnx.helper.make_node('ArgMin',
-                                     inputs=['a_np1'],
-                                     outputs=['out'],
-                                     axis=axis,
-                                     keepdims=keepdims)
-    graph = helper.make_graph([node],
-                              "argmin_test",
-                              inputs = [helper.make_tensor_value_info("a_np1",
-                                            TensorProto.INT32, list(a_np1.shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.INT32, list(b_np.shape))])
-
-    model = helper.make_model(graph, producer_name='argmin_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
-        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def verify_argmax(input_dim, axis=None, keepdims=None):
-    def _argmax_numpy(data, axis=0, keepdims=True):
-        result = np.argmax(data, axis=axis)
-        if (keepdims == 1):
-            result = np.expand_dims(result, axis)
-        return result.astype(data.dtype)
-
-    a_np1 = np.random.uniform(-10, 10, input_dim).astype(np.int32)
-
-    if keepdims is None and axis is None:
-        b_np = _argmax_numpy(a_np1)
-        node = onnx.helper.make_node('ArgMax',
-                                     inputs=['a_np1'],
-                                     outputs=['out'])
-    elif axis is None:
-        b_np = _argmax_numpy(a_np1, keepdims=keepdims)
-        node = onnx.helper.make_node('ArgMax',
-                                     inputs=['a_np1'],
-                                     outputs=['out'],
-                                     keepdims=keepdims)
-    elif keepdims is None:
-        b_np = _argmax_numpy(a_np1, axis=axis)
-        node = onnx.helper.make_node('ArgMax',
-                                     inputs=['a_np1'],
-                                     outputs=['out'],
-                                     axis=axis)
-    else:
-        b_np = _argmax_numpy(a_np1, axis=axis, keepdims=keepdims)
-        node = onnx.helper.make_node('ArgMax',
-                                     inputs=['a_np1'],
-                                     outputs=['out'],
-                                     axis=axis,
-                                     keepdims=keepdims)
-
-    graph = helper.make_graph([node],
-                              "argmax_test",
-                              inputs = [helper.make_tensor_value_info("a_np1",
-                                            TensorProto.INT32, list(a_np1.shape))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.INT32, list(b_np.shape))])
-
-    model = helper.make_model(graph, producer_name='argmax_test')
-
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
-        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_forward_arg_min_max():
-    '''Verify argmin and argmax'''
-    verify_argmin([3,4,4])
-    verify_argmax([3,4,4])
-    verify_argmin([3,4,4], axis=1)
-    verify_argmax([3,4,4], axis=0)
-    verify_argmin([3,4,4], keepdims=0)
-    verify_argmax([3,4,4], keepdims=1)
-    for axis in [0,1,2]:
-        for keepdims in [True,False]:
-            verify_argmin([3,4,4], axis, keepdims)
-            verify_argmax([3,4,4], axis, keepdims)
-
-def verify_constantfill(is_shape, input_dim, out_dim, value, dtype, **kwargs):
-    input_a = np.random.uniform(size=input_dim).astype(dtype)
-    out = np.empty(shape=out_dim, dtype=dtype)
-    out.fill(value)
-
-    if is_shape == True:
-        fill_node = helper.make_node("ConstantFill", [], ["out"], shape=input_dim, value=value, **kwargs)
-    else:
-        fill_node = helper.make_node("ConstantFill", ["input_a"], ["out"], value=value, dtype=dtype, **kwargs)
-
-    graph = helper.make_graph([fill_node],
-                              "fill_test",
-                              inputs = [helper.make_tensor_value_info("input_a",
-                                            TensorProto.FLOAT, list(input_dim))],
-                              outputs = [helper.make_tensor_value_info("out",
-                                            TensorProto.FLOAT, list(out.shape))])
-
-    model = helper.make_model(graph, producer_name='fill_test')
-
-    for target, ctx in ctx_list():
-        if is_shape == True:
-            tvm_out = get_tvm_output(model, [], target, ctx, out.shape)
-        else:
-            tvm_out = get_tvm_output(model, [input_a], target, ctx, out.shape)
-
-        tvm.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_constantfill():
-    verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32')
-    verify_constantfill(False, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32')
-    verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5, 4, 5, 6), 10, 'float32', extra_shape=(4, 5, 6))
-
-
-def verify_pad(indata, pads, value=0.0):
-    indata = np.array(indata).astype(np.float32)
-    #  numpy expect result
-    len_dim = len(pads) // 2
-    np_pads = [(pads[i], pads[i+len_dim]) for i in range(len_dim)]
-    outdata = np.pad(indata, pad_width=np_pads, mode='constant', constant_values=value)
-    #  onnx graph
-    node = helper.make_node(
-        'Pad',
-        inputs=['input'],
-        outputs=['output'],
-        mode='constant',
-        pads=pads,
-        value=value
-    )
-    graph = helper.make_graph([node],
-                              'pad_test',
-                              inputs = [helper.make_tensor_value_info("input",
-                                            TensorProto.FLOAT, list(indata.shape))],
-                              outputs = [helper.make_tensor_value_info("output",
-                                            TensorProto.FLOAT, list(outdata.shape))])
-    model = helper.make_model(graph, producer_name='pad_test')
-    #  tvm result
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')
-    tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_pad():
-    verify_pad(np.random.randn(2, 2).astype(np.float32), [0, 1, 0, 0], 0.0)
-    verify_pad(np.random.randn(2, 3).astype(np.float32), [1, 0, 0, 1], 0.0)
-    verify_pad(np.random.randn(3, 2).astype(np.float32), [0, 0, 1, 0], 5.0)
-
-def verify_reduce_x(name, indata, axis, keepdims):
-    indata = np.array(indata).astype(np.float32)
-    #  numpy expect result
-    if name == 'ReduceMax':
-        outdata = np.maximum.reduce(indata, axis=axis, keepdims=keepdims == 1)
-    elif name == 'ReduceMin':
-        outdata = np.minimum.reduce(indata, axis=axis, keepdims=keepdims == 1)
-    elif name == 'ReduceSum':
-        outdata = np.sum(indata, axis=axis, keepdims=keepdims == 1)
-    elif name == 'ReduceMean':
-        outdata = np.mean(indata, axis=axis, keepdims=keepdims == 1)
-    else:
-        raise Exception('unsupport op: {}'.format(name))
-    if len(np.asarray(outdata).shape) == 0:
-        outdata = np.asarray([outdata])
-    #  onnx graph
-    if axis is None:
-        node = helper.make_node(name, inputs=['input'], outputs=['output'],
-                                keepdims=keepdims)
-    else:
-        node = helper.make_node(name, inputs=['input'], outputs=['output'],
-                                axis=axis, keepdims=keepdims)
-    graph = helper.make_graph([node],
-                              '{}_test'.format(name),
-                              inputs = [helper.make_tensor_value_info("input",
-                                            TensorProto.FLOAT, list(indata.shape))],
-                              outputs = [helper.make_tensor_value_info("output",
-                                            TensorProto.FLOAT, list(outdata.shape))])
-    model = helper.make_model(graph, producer_name='{}_test'.format(name))
-    #  tvm result
-    for target, ctx in ctx_list():
-        tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')
-    tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
-
-def test_reduce_max():
-    verify_reduce_x("ReduceMax",
-                    np.random.randn(3, 2, 2).astype(np.float32),
-                    axis=None, keepdims=1)
-    verify_reduce_x("ReduceMax",
-                    np.random.randn(3, 2, 3).astype(np.float32),
-                    axis=None, keepdims=0)
-    verify_reduce_x("ReduceMax",
-                    np.random.randn(3, 3, 3).astype(np.float32),
-                    axis=(1,), keepdims=1)
-
-def test_reduce_min():
-    verify_reduce_x("ReduceMin",
-                    np.random.randn(3, 2, 2).astype(np.float32),
-                    axis=None, keepdims=1)
-    verify_reduce_x("ReduceMin",
-                    np.random.randn(3, 2, 3).astype(np.float32),
-                    axis=None, keepdims=0)
-    verify_reduce_x("ReduceMin",
-                    np.random.randn(3, 3, 3).astype(np.float32),
-                    axis=(1,), keepdims=1)
-
-def test_reduce_sum():
-    verify_reduce_x("ReduceSum",
-                    np.random.randn(3, 2, 2).astype(np.float32),
-                    axis=None, keepdims=1)
-    verify_reduce_x("ReduceSum",
-                    np.random.randn(3, 2, 3).astype(np.float32),
-                    axis=None, keepdims=0)
-    verify_reduce_x("ReduceSum",
-                    np.random.randn(3, 3, 3).astype(np.float32),
-                    axis=(1,), keepdims=1)
-
-def test_reduce_mean():
-    verify_reduce_x("ReduceMean",
-                    np.random.randn(3, 2, 2).astype(np.float32),
-                    axis=None, keepdims=1)
-    verify_reduce_x("ReduceMean",
-                    np.random.randn(3, 2, 3).astype(np.float32),
-                    axis=None, keepdims=0)
-    verify_reduce_x("ReduceMean",
-                    np.random.randn(3, 3, 3).astype(np.float32),
-                    axis=(1,), keepdims=1)
-
-def verify_split(indata, outdatas, split, axis=0):
-    indata = np.array(indata).astype(np.float32)
-    outdatas = [np.array(o).astype(np.float32) for o in outdatas]
-    node = helper.make_node(
-        'Split',
-        inputs=['input'],
-        outputs=['output_{}'.format(i) for i in range(len(split))],
-        axis=axis,
-        split=split
-    )
-    graph = helper.make_graph([node],
-                              'split_test',
-                              inputs = [helper.make_tensor_value_info("input",
-                                            TensorProto.FLOAT, list(indata.shape))],
-                              outputs = [helper.make_tensor_value_info("output_{}".format(i),
-                                            TensorProto.FLOAT, list(outdatas[i].shape))
-                                            for i in range(len(split))
-                                         ])
-    model = helper.make_model(graph, producer_name='split_test')
-
-    for target, ctx in ctx_list():
-        output_shape = [o.shape for o in outdatas]
-        output_type = ['float32', 'float32', 'float32']
-        tvm_out = get_tvm_output(model, indata, target, ctx, output_shape, output_type)
-    for o, t in zip(outdatas, tvm_out):
-        tvm.testing.assert_allclose(o, t)
-
-def test_split():
-    # 1D
-    verify_split([1., 2., 3., 4., 5., 6.], [[1., 2.], [3., 4.], [5., 6.]], [2, 2, 2], 0)
-    verify_split([1., 2., 3., 4., 5., 6.], [[1., 2.], [3.], [4., 5., 6.]], [2, 1, 3], 0)
-    # 2D
-    verify_split([[1., 2., 3., 4.], [7., 8., 9., 10.]],
-                 [[[1., 2.], [7., 8.]], [[3., 4.], [9., 10.]]], [2, 2], 1)
-
-def test_binary_ops():
-    in_shape = (1, 2, 3, 3)
-    dtype = "float32"
-    out_shape = in_shape
-
-    def verify_binary_ops(op, x, y, out_np, broadcast=None, rtol=1e-7, atol=1e-7):
-        if broadcast is None:
-            z = helper.make_node(op, ['in1', 'in2'], ['out'])
-        else:
-            z = helper.make_node(op, ['in1', 'in2'], ['out'], broadcast=1)
-        graph = helper.make_graph([z],
-                                   '_test',
-                                  inputs = [helper.make_tensor_value_info("in1",
-                                                TensorProto.FLOAT, list(in_shape)),
-                                            helper.make_tensor_value_info("in2",
-                                                TensorProto.FLOAT, list(in_shape))],
-                                  outputs = [helper.make_tensor_value_info("out",
-                                                TensorProto.FLOAT, list(out_shape))])
-        model = helper.make_model(graph, producer_name='_test')
-        for target, ctx in ctx_list():
-            tvm_out = get_tvm_output(model, [x, y], target, ctx)
-            tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol)
-
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    y = np.random.uniform(size=in_shape).astype(dtype)
-    z = np.random.uniform(size=(3,)).astype(dtype)
-    verify_binary_ops("Add",x, y, x + y, broadcast=None)
-    verify_binary_ops("Add", x, z,  x + z, broadcast=True)
-    verify_binary_ops("Sub", x, y, x - y, broadcast=None)
-    verify_binary_ops("Sub", x, z, x - z, broadcast=True)
-    verify_binary_ops("Mul",x, y, x * y, broadcast=None)
-    verify_binary_ops("Mul", x, z,  x * z, broadcast=True)
-    verify_binary_ops("Div", x, y, x / y, broadcast=None, rtol=1e-5, atol=1e-5)
-    verify_binary_ops("Div", x, z, x / z, broadcast=True, rtol=1e-5, atol=1e-5)
-    verify_binary_ops("Sum", x, y, x + y, broadcast=None)
-
-def test_single_ops():
-    in_shape = (1, 2, 3, 3)
-    dtype = "float32"
-    out_shape = in_shape
-
-    def verify_single_ops(op, x, out_np, rtol=1e-7, atol=1e-7):
-        z = helper.make_node(op, ['in1'], ['out'])
-        graph = helper.make_graph([z],
-                                   '_test',
-                                  inputs = [helper.make_tensor_value_info("in1",
-                                                TensorProto.FLOAT, list(in_shape)),],
-                                  outputs = [helper.make_tensor_value_info("out",
-                                                TensorProto.FLOAT, list(out_shape))])
-        model = helper.make_model(graph, producer_name='_test')
-        for target, ctx in ctx_list():
-            tvm_out = get_tvm_output(model, [x], target, ctx)
-            tvm.testing.assert_allclose(out_np, tvm_out, rtol=rtol, atol=atol)
-
-    x = np.random.uniform(size=in_shape).astype(dtype)
-    verify_single_ops("Neg",x, -x)
-    verify_single_ops("Abs",x, np.abs(x))
-    verify_single_ops("Reciprocal",x, 1/x, rtol=1e-5, atol=1e-5)
-    verify_single_ops("Sqrt",x, np.sqrt(x), rtol=1e-5, atol=1e-5)
-    verify_single_ops("Relu",x, np.maximum(x, 0))
-    verify_single_ops("Exp",x, np.exp(x), rtol=1e-5, atol=1e-5)
-    verify_single_ops("Log",x, np.log(x), rtol=1e-5, atol=1e-5)
-    verify_single_ops("Log",x, np.log(x), rtol=1e-5, atol=1e-5)
-    verify_single_ops("Tanh",x, np.tanh(x), rtol=1e-5, atol=1e-5)
-    verify_single_ops("Sigmoid",x, 1 / (1 + np.exp(-x)), rtol=1e-5, atol=1e-5)
-    verify_single_ops("Softsign",x, x / (1 + np.abs(x)), rtol=1e-5, atol=1e-5)
-    verify_single_ops("SoftPlus",x, np.log(1 + np.exp(x)), rtol=1e-5, atol=1e-5)
-
-def test_leaky_relu():
-    def leaky_relu_x(x, alpha):
-        return np.where(x >= 0, x, x * alpha)
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              leaky_relu_x,
-                              {'alpha': 0.25},
-                              'float32',
-                              'LeakyRelu',
-                              {'alpha': 0.25})
-
-def test_elu():
-    def elu_x(x, alpha):
-        return np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              elu_x,
-                              {'alpha': 0.25},
-                              'float32',
-                              'Elu',
-                              {'alpha': 0.25})
-
-def test_selu():
-    def selu_x(x, alpha, gamma):
-        return gamma * np.where(x > 0, x, alpha * (np.exp(x) - 1.0))
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              selu_x,
-                              {'alpha': 0.25, 'gamma': 0.3},
-                              'float32',
-                              'Selu',
-                              {'alpha': 0.25, 'gamma': 0.3})
-
-def test_ThresholdedRelu():
-    def ThresholdedRelu_x(x, alpha):
-        out_np = np.clip(x, alpha, np.inf)
-        out_np[out_np == alpha] = 0
-        return out_np
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              ThresholdedRelu_x,
-                              {'alpha': 0.25},
-                              'float32',
-                              'ThresholdedRelu',
-                              {'alpha': 0.25})
-
-def test_ScaledTanh():
-    def ScaledTanh_x(x, alpha, beta):
-        return alpha * np.tanh(beta * x)
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              ScaledTanh_x,
-                              {'alpha': 0.25, 'beta': 0.3},
-                              'float32',
-                              'ScaledTanh',
-                              {'alpha': 0.25, 'beta': 0.3})
-
-def test_ParametricSoftplus():
-    def ParametricSoftplus_x(x, alpha, beta):
-        return alpha * np.log(np.exp(beta * x) + 1)
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              ParametricSoftplus_x,
-                              {'alpha': 0.25, 'beta': 0.3},
-                              'float32',
-                              'ParametricSoftplus',
-                              {'alpha': 0.25, 'beta': 0.3})
-
-def test_Scale():
-    def Scale_x(x, scale):
-        return scale * x
-    _test_onnx_op_elementwise((2, 4, 5, 6),
-                              Scale_x,
-                              {'scale': 0.25},
-                              'float32',
-                              'Scale',
-                              {'scale': 0.25})
-
-def test_LogSoftmax():
-    _test_onnx_op_elementwise((1, 4),
-                              topi.testing.log_softmax_python,
-                              {},
-                              'float32',
-                              'LogSoftmax',
-                              {'axis': 1},
-                              rtol=1e-5,
-                              atol=1e-5)
-
-if __name__ == '__main__':
-    # verify_super_resolution_example()
-    # verify_squeezenet1_1()
-    # verify_lenet()
-    verify_resnet18()
-    test_reshape()
-    test_reshape_like()
-    test_power()
-    test_squeeze()
-    test_unsqueeze()
-    test_slice()
-    test_floor()
-    test_ceil()
-    test_clip()
-    test_matmul()
-    test_gather()
-    test_lrn()
-    test_upsample()
-    test_forward_min()
-    test_forward_max()
-    test_forward_mean()
-    test_forward_hardsigmoid()
-    test_forward_arg_min_max()
-    test_softmax()
-    test_constantfill()
-    test_pad()
-    test_reduce_max()
-    test_reduce_min()
-    test_reduce_sum()
-    test_reduce_mean()
-    test_split()
-    test_binary_ops()
-    test_single_ops()
-    test_leaky_relu()
-    test_elu()
-    test_selu()
-    test_ThresholdedRelu()
-    test_ScaledTanh()
-    test_ParametricSoftplus()
-    test_Scale()
-    test_LogSoftmax()
diff --git a/nnvm/tests/python/frontend/tensorflow/test_forward.py b/nnvm/tests/python/frontend/tensorflow/test_forward.py
deleted file mode 100644
index b49d702f26c2..000000000000
--- a/nnvm/tests/python/frontend/tensorflow/test_forward.py
+++ /dev/null
@@ -1,1299 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=import-self, invalid-name, unused-argument
-"""
-Tensorflow testcases
-====================
-This article is a test script to test tensorflow operator with NNVM.
-"""
-from __future__ import print_function
-import numpy as np
-import nnvm.compiler
-import tvm
-import tensorflow as tf
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import graph_util
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops import init_ops
-from tensorflow.core.framework import graph_pb2
-
-import tvm.relay.testing.tf as tf_testing
-
-#######################################################################
-# Generic run functions for TVM & tensorflow
-# ------------------------------------------
-def convert_to_list(x):
-    if not isinstance(x, list):
-        x = [x]
-    return x
-
-def run_tvm_graph(graph_def, input_data, input_node, num_output=1, target='llvm', out_names=None):
-    """ Generic function to compile on nnvm and execute on tvm """
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-
-    layout = None
-    if target == "cuda":
-        layout = "NCHW"
-    target_host = 'llvm'
-
-    if isinstance(input_data, list):
-        shape_dict = {}
-        dtype_dict = {}
-        for i, e in enumerate(input_node):
-            shape_dict[e] = input_data[i].shape
-            dtype_dict[e] = input_data[i].dtype
-    else:
-        shape_dict = {input_node: input_data.shape}
-        dtype_dict = {input_node: input_data.dtype}
-
-    sym, params = nnvm.frontend.from_tensorflow(graph_def, layout=layout, shape=shape_dict, outputs=out_names)
-    graph, lib, params = nnvm.compiler.build(sym, target=target, target_host=target_host, shape=shape_dict,
-                                             dtype=dtype_dict, params=params)
-
-    ctx = tvm.context(target, 0)
-    from tvm.contrib import graph_runtime
-    m = graph_runtime.create(graph, lib, ctx)
-    # set inputs
-    for i, e in enumerate(input_node):
-        m.set_input(e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
-
-    m.set_input(**params)
-    # execute
-    m.run()
-    # get outputs
-    assert out_names is None or num_output == len(out_names),"out_names: {} num_output: {}".format(
-                                                              out_names, num_output)
-    tvm_output_list = []
-    for i in range(0, num_output):
-        tvm_output = m.get_output(i)
-        tvm_output_list.append(tvm_output.asnumpy())
-    return tvm_output_list
-
-def run_tf_graph(sess, input_data, input_node, output_node):
-    """ Generic function to execute tensorflow """
-    input_data = convert_to_list(input_data)
-    input_node = convert_to_list(input_node)
-    output_node = convert_to_list(output_node)
-
-    tensor = [0] * len(output_node)
-    for i in range(len(output_node)):
-        tensor[i] = sess.graph.get_tensor_by_name(output_node[i])
-
-    input_dict = {}
-    for i, e in enumerate(input_node):
-        input_dict[e] = input_data[i]
-
-    output_data = sess.run(tensor, input_dict)
-    return output_data
-
-
-def compare_tf_with_tvm(in_data, in_name, out_name, init_global_variables=False, no_gpu=False):
-    """Generic function to generate and compare tensorflow and TVM output"""
-
-    out_name = convert_to_list(out_name)
-    out_node = [0]*len(out_name)
-    for i in range(len(out_name)):
-        out_node[i] = out_name[i].split(':')[0] if ":" in out_name[i] else out_name[i]
-
-    in_data = convert_to_list(in_data)
-    in_name = convert_to_list(in_name)
-    in_node = [0]*len(in_name)
-    for i in range(len(in_name)):
-        in_node[i] = in_name[i].split(':')[0] if ":" in in_name[i] else in_name[i]
-
-    with tf.Session() as sess:
-        if init_global_variables:
-            sess.run(variables.global_variables_initializer())
-        final_graph_def = tf.graph_util.convert_variables_to_constants(
-            sess,
-            sess.graph.as_graph_def(add_shapes=True),
-            out_node,
-            )
-        tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-
-        for device in ["llvm", "cuda"]:
-            ctx = tvm.context(device, 0)
-            if not ctx.exist:
-                print("Skip because %s is not enabled" % device)
-                continue
-            if no_gpu and device == 'cuda':
-                continue
-
-            tvm_output = run_tvm_graph(final_graph_def, in_data, in_node,
-                                       num_output=len(out_node), target=device, out_names=out_name)
-            # since the names from tensorflow and nnvm runs are not exactly same,
-            # first len(tf_output) will be compared
-            for i in range(len(tf_output)):
-                tvm.testing.assert_allclose(tf_output[i], tvm_output[i], atol=1e-5, rtol=1e-5)
-
-        sess.close()
-
-def is_gpu_available():
-    from tensorflow.python.client import device_lib
-    local_device_protos = device_lib.list_local_devices()
-    gpu_list = [x.name for x in local_device_protos if x.device_type == 'GPU']
-    if len(gpu_list) > 0:
-        print("Tensorflow GPU:", gpu_list)
-        return True
-    else:
-        return False
-
-#######################################################################
-# Pooling
-# -------
-def _test_pooling_iteration(input_shape, **kwargs):
-    """ One iteration of pool operation with given shapes and attributes """
-
-    x = -np.arange(
-        np.prod(input_shape), dtype=np.float32).reshape(input_shape) - 1
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype='float32')
-        nn_ops.pool(in_data, **kwargs)
-
-        if kwargs['pooling_type'] == 'MAX':
-            out_name = 'max_pool:0'
-        else:
-            out_name = 'avg_pool:0'
-
-        compare_tf_with_tvm(x, 'Placeholder:0', out_name)
-
-def _test_pooling(input_shape, **kwargs):
-    _test_pooling_iteration(input_shape, **kwargs)
-
-    if is_gpu_available():
-        input_shape = [input_shape[ii] for ii in (0, 3, 1, 2)]
-        kwargs['data_format'] = 'NCHW'
-        _test_pooling_iteration(input_shape, **kwargs)
-
-def test_forward_pooling():
-    """ Pooling """
-
-    for pool_type in ['AVG', 'MAX']:
-            _test_pooling(input_shape=[2, 9, 10, 2],
-                         window_shape=[1, 1],
-                         padding='SAME',
-                         pooling_type=pool_type,
-                         dilation_rate=[1, 1],
-                         strides=[1, 1])
-
-            _test_pooling(input_shape=[2, 10, 9, 2],
-                         window_shape=[1, 1],
-                         padding='SAME',
-                         pooling_type=pool_type,
-                         dilation_rate=[1, 1],
-                         strides=[1, 1])
-
-            _test_pooling(input_shape=[2, 9, 10, 2],
-                         window_shape=[2, 1],
-                         padding='SAME',
-                         pooling_type=pool_type,
-                         dilation_rate=[1, 1],
-                         strides=[1, 1])
-
-            _test_pooling(input_shape=[2, 10, 9, 2],
-                         window_shape=[2, 3],
-                         padding='SAME',
-                         pooling_type=pool_type,
-                         dilation_rate=[1, 1],
-                         strides=[2, 1])
-
-#######################################################################
-# Convolution
-# -----------
-
-def _test_convolution(tensor_in_sizes, filter_in_sizes,
-                      dilations, strides, padding, data_format):
-    """ One iteration of convolution with given shapes and attributes """
-
-    total_size_1 = 1
-    total_size_2 = 1
-    for s in tensor_in_sizes:
-        total_size_1 *= s
-    for s in filter_in_sizes:
-        total_size_2 *= s
-    # Initializes the input tensor with array containing incrementing
-    # numbers from 1.
-    data_array = [f * 1.0 for f in range(1, total_size_1 + 1)]
-    filter_array = [f * 1.0 for f in range(1, total_size_2 + 1)]
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=tensor_in_sizes, dtype='float32')
-        in_filter = constant_op.constant(filter_array, shape=filter_in_sizes, dtype='float32')
-        if data_format == 'NHWC':
-            strides = [1] + strides + [1]
-            dilations = [1] + dilations + [1]
-        else:
-            strides = [1, 1] + strides
-            dilations = [1, 1] + dilations
-
-        nn_ops.conv2d(in_data,
-                      in_filter,
-                      strides=strides,
-                      padding=padding,
-                      data_format=data_format)
-
-        compare_tf_with_tvm(np.reshape(data_array, tensor_in_sizes).astype('float32'),
-                            'Placeholder:0', 'Conv2D:0')
-
-def test_forward_convolution():
-    if is_gpu_available():
-        _test_convolution([4, 176, 8, 8], [1, 1, 176, 32], [1, 1], [1, 1], 'SAME', 'NCHW')
-        _test_convolution([4, 19, 17, 17], [3, 3, 19, 19], [1, 1], [2, 2], 'VALID', 'NCHW')
-        _test_convolution([4, 124, 17, 17], [1, 1, 124, 19], [1, 1], [1, 1], 'SAME', 'NCHW')
-        _test_convolution([4, 12, 17, 17], [3, 3, 12, 32], [1, 1], [2, 2], 'VALID', 'NCHW')
-
-    _test_convolution([4, 8, 8, 176], [1, 1, 176, 32], [1, 1], [1, 1], 'SAME', 'NHWC')
-    _test_convolution([4, 17, 17, 19], [3, 3, 19, 19], [1, 1], [2, 2], 'VALID', 'NHWC')
-    _test_convolution([4, 17, 17, 124], [1, 1, 124, 19], [1, 1], [1, 1], 'SAME', 'NHWC')
-    _test_convolution([4, 17, 17, 12], [3, 3, 12, 32], [1, 1], [2, 2], 'VALID', 'NHWC')
-
-#######################################################################
-# Reshape
-# -------
-
-def _test_reshape(data, out_shape):
-    """ One iteration of reshape operation with given data and out shape """
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        array_ops.reshape(in_data, out_shape)
-
-        compare_tf_with_tvm(data, 'Placeholder:0', 'Reshape:0')
-
-def test_forward_reshape():
-    _test_reshape(np.arange(6.0), [2, 3])
-    _test_reshape(np.arange(6), [-1, 2])
-    _test_reshape(np.arange(6), [3, -1])
-    _test_reshape(np.arange(6), [-1])
-
-#######################################################################
-#######################################################################
-# Squeeze
-# -------
-
-def _test_squeeze(data, squeeze_dims=None):
-    """ One iteration of squeeze """
-
-    if squeeze_dims is None:
-        squeeze_dims = []
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-
-        if squeeze_dims:
-            array_ops.squeeze(in_data, squeeze_dims)
-        else:
-            array_ops.squeeze(in_data)
-
-        compare_tf_with_tvm(data, 'Placeholder:0', 'Squeeze:0')
-
-def test_forward_squeeze():
-    """ Squeeze """
-
-    # Nothing to squeeze.
-    _test_squeeze(np.arange(2).reshape((2)))
-    _test_squeeze(np.arange(6).reshape((2, 3)))
-
-    # Squeeze the middle element away.
-    _test_squeeze(np.arange(4).reshape((2, 1, 2)))
-
-    # Squeeze on both ends.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)))
-
-    # Positive squeeze dim index.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [2, 4])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [0, 4, 2])
-
-    # Negative squeeze dim index.
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-1])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5])
-    _test_squeeze(np.arange(6).reshape((1, 2, 1, 3, 1)), [-3, -5, -1])
-
-#######################################################################
-# ConcatV2
-# --------
-
-def _test_concat_v2(data, dim):
-    """ One iteration of ConcatV2 """
-
-    with tf.Graph().as_default():
-        gen_array_ops._concat_v2(data, dim)
-
-        compare_tf_with_tvm(data, ['ConcatV2/values_0:0', 'ConcatV2/values_1:0'],
-                            'ConcatV2:0')
-
-def _test_forward_concat_v2():
-    t1 = np.array([])
-    t2 = np.array([])
-    test_concat_v2([t1, t2], 0)
-
-    t1 = np.array([[1, 2, 3], [4, 5, 6]])
-    t2 = np.array([[7, 8, 9], [10, 11, 12]])
-
-    _test_concat_v2([t1, t2], 1)
-
-#######################################################################
-# Sigmoid
-# -------
-
-def _test_sigmoid(data):
-    """ One iteration of sigmoid """
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        sigmoid_out = math_ops.sigmoid(in_data)
-
-        compare_tf_with_tvm(data, 'Placeholder:0', 'Sigmoid:0')
-
-def test_forward_sigmoid():
-    """ Sigmoid """
-
-    _test_sigmoid(np.random.uniform(size=(3, 4, 4, 3)).astype('float32'))
-
-#######################################################################
-# Argmin/Argmax
-# -------------
-
-def _test_argx(func, data, **kwargs):
-
-    with tf.Graph().as_default():
-        inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
-        func(inp, name="argx0", **kwargs, output_type=tf.int32)
-
-        compare_tf_with_tvm(data, 'c0:0', 'argx0:0')
-
-def test_forward_argminmax():
-    for axis in [None,0,1,2]:
-        data = np.random.uniform(size=(8,4,9)).astype('float32')
-        _test_argx(tf.argmax, data=data, axis=axis)
-        _test_argx(tf.argmin, data=data, axis=axis)
-
-#######################################################################
-# Reduce
-# ------
-
-def _test_reduce(func, data, **kwargs):
-    """ One iteration of a reduce operation"""
-
-    with tf.Graph().as_default():
-        inp = array_ops.placeholder(shape=data.shape, dtype=data.dtype, name="c0")
-        func(inp, name="reducex0", **kwargs)
-
-        compare_tf_with_tvm(data, 'c0:0', 'reducex0:0')
-
-def test_forward_reduce():
-    data = np.random.uniform(size=(8,4,9)).astype('float32')
-    _test_reduce(tf.reduce_sum, data=data)
-    _test_reduce(tf.reduce_sum, data=data, axis=0)
-    _test_reduce(tf.reduce_sum, data=data, axis=(0,1))
-
-
-#######################################################################
-# Variable
-# --------
-
-def _test_variable(data):
-    """ One iteration of a variable """
-
-    tf.reset_default_graph()
-    input_op = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-    input_tensor = array_ops.reshape(input_op, data.shape)
-
-    size = input_tensor.shape.dims[1]
-    with variable_scope.variable_scope("linear", reuse=None):
-        w = variable_scope.get_variable(
-            "w", shape=[size, size], dtype=input_tensor.dtype)
-    math_ops.matmul(input_tensor, w)
-
-    compare_tf_with_tvm(data, 'Placeholder:0', 'MatMul:0', init_global_variables=True)
-
-def test_forward_variable():
-    """Variable type op test"""
-    _test_variable(np.random.uniform(size=(32, 100)).astype('float32'))
-
-
-#######################################################################
-# StridedSlice
-# ------------
-
-def _test_stridedslice(ip_shape, begin, end, stride, dtype,
-                             begin_mask=0, end_mask=0, new_axis_mask=0,
-                             shrink_axis_mask=0, ellipsis_mask=0):
-    """ One iteration of a Stridedslice """
-
-    tf.reset_default_graph()
-    in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-    tf.strided_slice(in_data, begin, end, stride, begin_mask=begin_mask,
-                         end_mask=end_mask, new_axis_mask=new_axis_mask,
-                         shrink_axis_mask=shrink_axis_mask,
-                         ellipsis_mask=ellipsis_mask, name="strided_slice")
-    np_data = np.random.uniform(size=ip_shape).astype(dtype)
-
-    compare_tf_with_tvm(np_data, 'in_data:0', 'strided_slice:0')
-
-def test_forward_stridedslice():
-    '''test StridedSlice'''
-
-    _test_stridedslice((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], 'float32')
-    _test_stridedslice((3, 4, 3), [1, 0], [4, 3], [2, 1], 'float32', ellipsis_mask=8)
-    _test_stridedslice((3, 4, 3), [1, 0], [4, 2], [2, 1], 'float32', ellipsis_mask=2)
-    _test_stridedslice((3, 4, 5, 3), [1, 0], [4, 2], [2, 1], 'float32', ellipsis_mask=2)
-    _test_stridedslice((3, 4, 5, 3), [1, 0, 1], [4, 2, 2], [2, 1, 1], 'float32', ellipsis_mask=2)
-    _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 2], [2, 1, 1], 'float32', new_axis_mask=5)
-    _test_stridedslice((3, 4, 3), [1, 1, 1], [4, 4, 1], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=4)
-    _test_stridedslice((6, 4, 5), [1, 1, 1], [6, 3, 4], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=5)
-    _test_stridedslice((3, 4, 3), [1, 1, 2], [4, 4, 3], [2, 1, 1], 'float32', ellipsis_mask=4, new_axis_mask=2)
-    _test_stridedslice((3, 4, 3), [1, 1, 2], [4, 4, 3], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=3)
-    _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 1], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=3)
-    _test_stridedslice((3, 4, 3), [1, 1, 2], [4, 4, 3], [2, 1, 1], 'float32', ellipsis_mask=2, new_axis_mask=2)
-    _test_stridedslice((3,4), [1, 0], [4, 4], [1, 1], 'float32', shrink_axis_mask=2)
-    _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], 'float32', shrink_axis_mask=2, new_axis_mask=2)
-    _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], 'float32', shrink_axis_mask=1, new_axis_mask=2)
-    _test_stridedslice((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], 'float32', shrink_axis_mask=2, new_axis_mask=1)
-    _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0], [2, 3], [1, 1], 'float32', shrink_axis_mask=5, new_axis_mask=1)
-    _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0, 1, 2, 1], [2, 3, 4, 5, 3], [1, 1, 2, 2, 1],
-                       'float32', shrink_axis_mask=5, new_axis_mask=1, ellipsis_mask=2, begin_mask=8, end_mask=8)
-    _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0, 1, 2, 1], [2, 3, 4, 5, 3], [1, 1, 2, 2, 1],
-                       'float32', shrink_axis_mask=8, new_axis_mask=1, ellipsis_mask=2, begin_mask=5, end_mask=5)
-    _test_stridedslice((3, 4, 5, 4, 5, 6), [0, 0, 1, 2, 1], [2, 3, 4, 5, 3], [1, 1, 2, 2, 1],
-                       'float32', shrink_axis_mask=16, new_axis_mask=1, ellipsis_mask=2, begin_mask=5, end_mask=5)
-    _test_stridedslice((3, 4, 5, 4, 5, 6), [1, 2, 0, -3], [4, 5, 3, 3], [2, 2, 1, 1],
-                       'float32', shrink_axis_mask=8, new_axis_mask=1, ellipsis_mask=2, begin_mask=5,
-                       end_mask=8)
-    _test_stridedslice((1), [0], [1], [1], 'float32', shrink_axis_mask=1)
-
-
-#######################################################################
-# Gather
-# ------
-
-def _test_gather(ip_shape, indice_shape, indice_value, axis, dtype):
-    """ One iteration of a Gather """
-
-    tf.reset_default_graph()
-    in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-    indices = tf.placeholder("int32", indice_shape, name="indices")
-    tf.gather(in_data, indices, axis=axis)
-    np_data = np.random.uniform(size=ip_shape).astype(dtype)
-
-    def _fill_indices(indice_value):
-        indices = np.array(ip_shape, dtype=dtype)
-        if isinstance(indice_value, int):
-            indices = np.array([indice_value], dtype='int32')
-        else:
-            indices = np.asarray(indice_value, dtype='int32')
-        return indices
-    np_indices = _fill_indices(indice_value)
-
-    compare_tf_with_tvm([np_data, np_indices], ['in_data:0', 'indices:0'], 'GatherV2:0')
-
-def test_forward_gather():
-    '''test gather layer'''
-    _test_gather((4,), (1,), 1, 0, 'int32')
-    _test_gather((4,), (1,), 1, 0, 'float32')
-    _test_gather((1,4), (1,), [0], 0, 'int32')
-    _test_gather((4,), (1,2,2), [[[1,0],[0,1]]], 0, 'float32')
-    _test_gather((2,2), (1,2,2), [[[1,0],[0,1]]], 0, 'int32')
-    _test_gather((2,2), (1,2,2), [[[1,0],[0,1]]], 1, 'int32')
-    _test_gather((2,2), (1,2,2), [[[1,0],[0,1]]], 0, 'float32')
-    _test_gather((3,3,3), (1,1,2), [[[1,0]]], 0, 'int32')
-    _test_gather((3,3,3), (1,1,2), [[[1,0]]], 2, 'int32')
-    _test_gather((4,3,5,6), (1,4), [[2,1,0,0]], 0, 'float32')
-
-
-#######################################################################
-# Split
-# -----
-
-def _test_split(in_shape, axis, num_or_size_splits, dtype):
-    np_data = np.random.uniform(-5, 5, size=in_shape).astype(dtype)
-
-    """ One iteration of a Split """
-    tf.reset_default_graph()
-    in_data = tf.placeholder(dtype, in_shape, name="in_data")
-    num_split = len(num_or_size_splits) if isinstance(num_or_size_splits, list) else num_or_size_splits
-    tf.split(in_data, num_or_size_splits, axis=axis)
-
-    compare_tf_with_tvm([np_data], ['in_data:0'], ['split:{0}'.format(n) for n in range(num_split)])
-
-    # and now test together with concat
-    tf.reset_default_graph()
-    in_data = tf.placeholder(dtype, in_shape, name="in_data")
-    splitted = tf.split(in_data, num_or_size_splits, axis=axis)
-    tf.concat(splitted, axis)
-
-    compare_tf_with_tvm([np_data], 'in_data:0', 'concat:0')
-
-def test_forward_split():
-    '''test split layer'''
-    # rank 1
-    _test_split((3,), 0, 1, 'float32')
-    _test_split((3,), 0, 3, 'float32')
-    _test_split((6,), 0, 3, 'float32')
-    # rank 2
-    _test_split((6, 2), 0, 3, 'float32')
-    _test_split((2, 6), 1, 6, 'float32')
-    # rank 3
-    _test_split((6, 2, 4), 0, 2, 'int32')
-    _test_split((2, 6, 4), 1, 3, 'float32')
-    _test_split((2, 4, 6), 2, 1, 'float32')
-    # rank 4
-    _test_split((6, 1, 3, 5), 0, 3, 'float32')
-    _test_split((1, 6, 3, 5), 1, 3, 'float32')
-    _test_split((1, 3, 6, 5), 2, 3, 'float32')
-    _test_split((1, 3, 5, 6), 3, 3, 'float32')
-    # split along negative axis
-    _test_split((6, 1, 3, 5), -4, 3, 'float32')
-    _test_split((1, 6, 3, 5), -3, 3, 'float32')
-    _test_split((1, 3, 6, 5), -2, 3, 'float32')
-    _test_split((1, 3, 5, 6), -1, 3, 'float32')
-    # size_splits list
-    _test_split((6,), 0, [1, 2, 3], 'int32')
-    _test_split((3, 6, 4), -2, [1, 4, 1], 'float32')
-
-
-#######################################################################
-# Unstack
-# -------
-
-def _test_unstack(ip_shape, axis, dtype):
-    np_data = np.random.uniform(-5, 5, size=ip_shape).astype(dtype)
-
-    tf.reset_default_graph()
-    in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-    tf.unstack(in_data, axis=axis)
-
-    compare_tf_with_tvm([np_data], ['in_data:0'], ['unstack:{0}'.format(n) for n in range(ip_shape[axis])])
-
-    tf.reset_default_graph()
-    in_data = tf.placeholder(dtype, ip_shape, name="in_data")
-    tf.stack(tf.unstack(in_data, axis=axis), axis=axis)
-
-    compare_tf_with_tvm([np_data], ['in_data:0'], 'stack:0')
-
-def test_forward_unstack():
-    '''test unstack layer'''
-    _test_unstack((6,), 0, 'int32')
-    _test_unstack((2,6), 1, 'float64')
-    # negative axis
-    _test_unstack((1,4), -1, 'int32')
-    _test_unstack((3,6,4), -2, 'float32')
-
-
-#######################################################################
-# Multi Input to graph
-# --------------------
-
-def test_forward_multi_input():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.int32, shape=[3, 3], name='in1')
-        in2 = tf.placeholder(tf.int32, shape=[3, 3], name='in2')
-        in3 = tf.placeholder(tf.int32, shape=[3, 3], name='in3')
-        in4 = tf.placeholder(tf.int32, shape=[3, 3], name='in4')
-
-        out1 = tf.add(in1, in2, name='out1')
-        out2 = tf.subtract(in3, in4, name='out2')
-        out = tf.multiply(out1, out2, name='out')
-        in_data = np.arange(9, dtype='int32').reshape([3, 3])
-
-        compare_tf_with_tvm([in_data, in_data, in_data, in_data],
-                            ['in1:0', 'in2:0', 'in3:0', 'in4:0'], 'out:0')
-
-#######################################################################
-# Multi Output to Graph
-# ---------------------
-
-def test_forward_multi_output():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.int32, shape=[3, 3], name='in1')
-        in2 = tf.placeholder(tf.int32, shape=[3, 3], name='in2')
-        in3 = tf.placeholder(tf.int32, shape=[3, 3], name='in3')
-        in4 = tf.placeholder(tf.int32, shape=[3, 3], name='in4')
-
-        out1 = tf.add(in1, in2, name='out1')
-        out2 = tf.subtract(in3, in4, name='out2')
-        in_data = np.arange(9, dtype='int32').reshape([3, 3])
-        in_data = [in_data] * 4
-        in_name = ['in1:0', 'in2:0', 'in3:0', 'in4:0']
-        out_name = ['out1:0', 'out2:0']
-        out_node = [out.strip(':0') for out in out_name]
-        in_node = [inp.strip(':0') for inp in in_name]
-
-        with tf.Session() as sess:
-            final_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess, sess.graph.as_graph_def(add_shapes=True), out_node,)
-            tf_output = run_tf_graph(sess, in_data, in_name, out_name)
-            tvm_output = run_tvm_graph(final_graph_def, in_data, in_node, target='llvm',
-                                       out_names=out_node, num_output=2)
-            for i in range(len(tf_output)):
-                tvm.testing.assert_allclose(tf_output[i], tvm_output[i], atol=1e-5, rtol=1e-5)
-
-#######################################################################
-# Resize Bilinear
-# ---------------
-
-def _test_resize_bilinear(in_shape, to_shape, align_corners):
-    """ One iteration of resize bilinear """
-
-    data = np.random.uniform(size=in_shape).astype('float32')
-    shape_data = np.array(to_shape).astype('int32')
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        shape_data = constant_op.constant(shape_data, shape=shape_data.shape, dtype=shape_data.dtype)
-        tf.image.resize_bilinear(in_data, shape_data, align_corners=align_corners)
-
-        compare_tf_with_tvm(data, 'Placeholder:0', 'ResizeBilinear:0')
-
-def test_forward_resize_bilinear():
-    """ Resize Bilinear """
-
-    _test_resize_bilinear((4, 16, 32, 32), [50, 50], False)
-    _test_resize_bilinear((6, 32, 64, 64), [20, 20], True)
-
-
-#######################################################################
-# Crop to bounding box
-# --------------------
-
-def _test_crop(in_shape, off_h, off_w, tar_h, tar_w):
-    """ Crop to bounding box """
-    data = np.random.uniform(size=in_shape).astype('float32')
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=data.shape, dtype=data.dtype)
-        tf.image.crop_to_bounding_box(in_data, off_h, off_w, tar_h, tar_w)
-        compare_tf_with_tvm(data, 'Placeholder:0', 'crop_to_bounding_box/Slice:0')
-
-def test_forward_crop():
-    """ Crop to bounding box """
-    _test_crop((1, 224, 224, 3), 20, 20, 120, 120)
-
-
-#######################################################################
-# LSTM
-# ----
-
-def _test_lstm_cell(batch_size, num_hidden, num_layers, forget_bias, dtype):
-    """ One iteration of a LSTM cell """
-
-    tf.reset_default_graph()
-    input_size = num_hidden
-    input_data = np.full((batch_size, input_size), 1., dtype=dtype)
-    in_state_c = np.full((num_layers, batch_size, num_hidden), 0.1, dtype=dtype)
-    in_state_h = np.full((num_layers, batch_size, num_hidden), 0.1, dtype=dtype)
-
-    def _get_tensorflow_output():
-        with tf.Session() as sess:
-            with variable_scope.variable_scope(
-                "root", initializer=init_ops.constant_initializer(0.5)):
-                m0 = array_ops.zeros([batch_size, num_hidden])
-                m1 = array_ops.zeros([batch_size, num_hidden])
-                x=tf.placeholder(shape=(batch_size, input_size), dtype=dtype)
-                g, ((out_m0, out_m1)) = \
-                     tf.contrib.rnn.LSTMBlockCell(num_hidden,
-                                                  forget_bias=forget_bias)(x, ((m0, m1)))
-                sess.run([variables.global_variables_initializer()])
-                res = sess.run([g, out_m0, out_m1], {
-                    x.name: np.array([[1., 1.]]),
-                    m0.name: 0.1 * np.ones([batch_size, num_hidden]),
-                    m1.name: 0.1 * np.ones([batch_size, num_hidden]),
-                })
-            graph_def = sess.graph.as_graph_def(add_shapes=True)
-            final_graph_def = graph_util.convert_variables_to_constants(
-                sess,
-                graph_def,
-                ['root/lstm_cell/LSTMBlockCell'])
-            return final_graph_def, res
-
-    graph_def, tf_out = _get_tensorflow_output()
-    tvm_output = run_tvm_graph(graph_def, [input_data, in_state_c, in_state_h],
-                               ['root/Placeholder', 'root/lstm_cell/LSTMBlockCell_c',
-                                'root/lstm_cell/LSTMBlockCell_h'], num_output=2)
-    assert isinstance(tvm_output, list)
-
-    out = tvm_output[0]
-    out_state = tvm_output[1]
-    out_state_tup = np.split(out_state, indices_or_sections=2, axis=1)
-    out_state_c = np.reshape(out_state_tup[0], (batch_size, num_hidden))
-    out_state_h = np.reshape(out_state_tup[1], (batch_size, num_hidden))
-    tvm_out = [out, out_state_c, out_state_h]
-    tvm.testing.assert_allclose(tf_out[0], tvm_out[0], rtol=1e-3, atol=1e-3)
-
-def test_forward_lstm():
-    '''test LSTM block cell'''
-    _test_lstm_cell(1, 2, 1, 0.0, 'float32')
-
-
-
-#######################################################################
-# Pack
-# ---
-def _test_pack(axis, shape, **kwargs):
-
-    a = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-    b = np.arange(np.prod(shape), dtype=np.float32).reshape(shape)
-
-    with tf.Graph().as_default():
-        tf_a = array_ops.placeholder(shape=shape, dtype='float32', name='pl_a')
-        tf_b = array_ops.placeholder(shape=shape, dtype='float32', name='pl_b')
-        tf_c = tf.stack([tf_a,tf_b], axis=axis, **kwargs)
-        assert tf_c.op.op_def.name == 'Pack', "tf.stack() is expected to produce 'Pack' operation"
-
-        compare_tf_with_tvm([a,b], ['pl_a:0','pl_b:0'], 'stack:0')
-
-def test_forward_pack():
-    for axis in range(-3,3):
-        _test_pack(axis, [3,2,1])
-    for axis in range(-1,1):
-        _test_pack(axis, [3])
-    _test_pack(0, [])
-
-#######################################################################
-# Pad
-# ---
-def _test_pad(input_shape, paddings, mode, **kwargs):
-    """ One iteration of pad operation with given shape"""
-
-    x = np.arange(np.prod(input_shape), dtype=np.float32).reshape(input_shape)
-
-    with tf.Graph().as_default():
-        in_data = array_ops.placeholder(shape=input_shape, dtype='float32')
-        pad_values = constant_op.constant(paddings)
-        pad = tf.pad(in_data, paddings=pad_values, mode=mode, **kwargs)
-
-        if mode == 'CONSTANT':
-            if 'constant_values' in kwargs:
-                out_name = 'PadV2:0'
-            else:
-                out_name = 'Pad:0'
-
-        compare_tf_with_tvm(x, 'Placeholder:0', out_name)
-
-def test_forward_pad():
-    """ Pad """
-    _test_pad((2, 3), [[1,1], [2,2]], mode="CONSTANT")
-    _test_pad((2, 3), [[1,1], [2,2]], mode="CONSTANT", constant_values=1.0)
-
-#######################################################################
-# Logical operators
-# --------------------
-def test_logical_and():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in2')
-        out = tf.logical_and(in1, in2, name='out')
-        in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        in_data2 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        compare_tf_with_tvm([in_data1, in_data2], ['in1:0', 'in2:0'], 'out:0')
-
-def test_logical_or():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in2')
-        out = tf.logical_or(in1, in2, name='out')
-        in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        in_data2 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        compare_tf_with_tvm([in_data1, in_data2], ['in1:0', 'in2:0'], 'out:0')
-
-def test_logical_xor():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
-        in2 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in2')
-        out = tf.logical_xor(in1, in2, name='out')
-        in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        in_data2 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        compare_tf_with_tvm([in_data1, in_data2], ['in1:0', 'in2:0'], 'out:0')
-
-def test_logical_not():
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(tf.bool, shape=[1, 4, 4, 3], name='in1')
-        out = tf.logical_not(in1, name='out')
-        in_data1 = np.random.choice(a=[False, True],size=(1, 4, 4, 3)).astype('bool')
-        compare_tf_with_tvm(in_data1, 'in1:0', 'out:0')
-
-def test_forward_logical():
-    test_logical_and()
-    test_logical_or()
-    test_logical_xor()
-    test_logical_not()
-
-#######################################################################
-# Inception V3
-# ------------
-def test_forward_inception_v3():
-    '''test inception V3 model'''
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload('InceptionV3/inception_v3_2016_08_28_frozen-with_shapes.pb')
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 299, 299, 3)).astype('float32')
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(sess, data, 'input:0', 'InceptionV3/Predictions/Reshape_1:0')
-            tvm_output = run_tvm_graph(graph_def, data, 'input')
-            tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# Inception V1
-# ------------
-def test_forward_inception_v1():
-    '''test inception V1 model'''
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload("InceptionV1/classify_image_graph_def-with_shapes.pb")
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        # Build an image from random data.
-        from PIL import Image
-        from tvm.contrib import util
-
-        img_array = np.random.uniform(size=(1, 600, 600, 3)).astype("uint8")
-        img = Image.frombuffer('RGB', (600, 600), img_array.tostring(), 'raw', 'RGB', 0, 1)
-        temp = util.tempdir()
-        img_path = temp.relpath("tf-test.jpg")
-        img.save(img_path);
-
-        import os.path
-        if not tf.gfile.Exists(os.path.join(img_path)):
-            tf.logging.fatal('File does not exist %s', image)
-        data = tf.gfile.FastGFile(os.path.join(img_path), 'rb').read()
-
-        temp.remove()
-
-        # Extract tensorflow decoded image frame for tvm input
-        with tf.Session() as sess:
-            tvm_data = run_tf_graph(sess, data, 'DecodeJpeg/contents:0', 'DecodeJpeg:0')
-
-        with tf.Session() as sess:
-            tf_output = run_tf_graph(sess, data, 'DecodeJpeg/contents:0', 'softmax:0')
-            tvm_output = run_tvm_graph(graph_def, tvm_data, 'DecodeJpeg/contents')
-            tvm.testing.assert_allclose(tf_output[0], tvm_output[0], rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# Mobilenet
-# ---------
-def test_forward_mobilenet():
-    '''test mobilenet model'''
-    # MobilenetV2
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload(
-            "https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz",
-            "mobilenet_v2_1.4_224_frozen.pb")
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-        data = np.random.uniform(size=(1, 224, 224, 3)).astype('float32')
-        out_node = 'MobilenetV2/Predictions/Reshape_1'
-
-        with tf.Session() as sess:
-            # Add shapes to the graph.
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
-            tf_output = run_tf_graph(sess, data, 'input:0', out_node + ':0')
-            tvm_output = run_tvm_graph(graph_def, data, 'input')
-            tvm.testing.assert_allclose(np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# ResnetV2
-# --------
-def test_forward_resnetv2():
-    '''test resnet model'''
-    if is_gpu_available():
-        with tf.Graph().as_default():
-            graph_def = tf_testing.get_workload("ResnetV2/resnet-20180601_resnet_v2_imagenet-shapes.pb")
-            # Call the utility to import the graph definition into default graph.
-            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-            data = np.random.uniform(size=(128, 224, 224, 3)).astype('float32')
-            out_node = 'ArgMax'
-
-            with tf.Session() as sess:
-                tf_output = run_tf_graph(sess, data, 'input_tensor:0', out_node + ':0')
-                for device in ["llvm", "cuda"]:
-                    ctx = tvm.context(device, 0)
-                    if not ctx.exist:
-                        print("Skip because %s is not enabled" % device)
-                        continue
-                    tvm_output = run_tvm_graph(graph_def, data, 'input_tensor', len(tf_output), target=device)
-                    tvm.testing.assert_allclose(np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# Placeholder
-# -----------
-def test_forward_placeholder():
-    '''test a simple pb with Placeholder node in the end of GraphDef'''
-    with tf.Graph().as_default():
-        graph_def = tf_testing.get_workload("Custom/placeholder.pb")
-
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-
-        data = np.random.uniform(size=(1, 224, 224, 3)).astype('float32')
-        out_node = 'mul'
-
-        with tf.Session() as sess:
-            # Add shapes to the graph.
-            graph_def = tf_testing.AddShapesToGraphDef(sess, out_node)
-            tf_output = run_tf_graph(sess, data, 'Placeholder:0', out_node + ':0')
-            tvm_output = run_tvm_graph(graph_def, data, 'Placeholder')
-            print("tf_output is {}\ntvm_output is {}".format(tf_output, tvm_output))
-            tvm.testing.assert_allclose(np.squeeze(tvm_output[0]), np.squeeze(tf_output[0]), rtol=1e-5, atol=1e-5)
-
-#######################################################################
-# PTB
-# ---
-dir(tf.contrib)
-def test_forward_ptb():
-    '''test ptb model'''
-    config = tf_testing.get_config()
-    num_steps = config.num_steps
-    num_hidden = config.hidden_size
-    num_layers = config.num_layers
-    batch_size = config.batch_size
-    vocab_size = config.vocab_size
-    out_sample_shape = (batch_size, vocab_size)
-    out_state_shape = (num_layers, 2, batch_size, num_hidden)
-    #Sample input
-    inpt = "we have no useful information on"
-    cnt_sample = 20
-
-    def _pretty_print(items, is_char_model, id2word):
-        if not is_char_model:
-            return ' '.join([id2word[x] for x in items])
-        else:
-            return ''.join([id2word[x] for x in items]).replace('_', ' ')
-
-    def _get_tvm_graph_module(graph_def):
-        sym, params = nnvm.frontend.from_tensorflow(graph_def)
-
-        #Cell inputs 'c and 'h' consist of all layers values
-        shape_dict = {'Model/Placeholder': (batch_size, num_steps),
-                      'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_c':(num_layers, batch_size, num_hidden),
-                      'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_h':(num_layers, batch_size, num_hidden)}
-        dtype_dict = {'Model/Placeholder': 'int32',
-                      'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_c':'float32',
-                      'Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_h':'float32'}
-        target = 'llvm'
-        graph, lib, params = nnvm.compiler.build(sym, target, shape_dict,
-                                                 dtype=dtype_dict, params=params)
-        from tvm.contrib import graph_runtime
-        ctx = tvm.cpu(0)
-        return params, graph_runtime.create(graph, lib, ctx)
-
-    def _do_tvm_sample(model, data, in_states, params, num_samples):
-        """Sampled from the model"""
-        samples = []
-        state = in_states
-        sample = None
-        def _get_sample(data, state):
-            input_data = np.full((batch_size, num_steps), data, dtype="int32")
-            in_state_tup = np.split(state, indices_or_sections=2, axis=1)
-            in_state_c = np.reshape(in_state_tup[0], (num_layers, batch_size, num_hidden))
-            in_state_h = np.reshape(in_state_tup[1], (num_layers, batch_size, num_hidden))
-
-            model.set_input('Model/Placeholder', tvm.nd.array(input_data.astype("int32")))
-            model.set_input('Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_c',
-                        tvm.nd.array(in_state_c.astype("float32")))
-            model.set_input('Model/RNN/RNN/multi_rnn_cell/cell_0/lstm_cell/LSTMBlockCell_h',
-                        tvm.nd.array(in_state_h.astype("float32")))
-            model.set_input(**params)
-            model.run()
-            tvm_output = model.get_output(0, tvm.nd.empty(out_sample_shape,
-                                                      "float32")).asnumpy()
-            state_output = model.get_output(1, tvm.nd.empty(out_state_shape,
-                                                        "float32")).asnumpy()
-            sample = tf_testing.pick_from_weight(tvm_output[0])
-
-            return sample, state_output
-
-        for x in data:
-            sample, state = _get_sample(x, state)
-
-        if sample is not None:
-            samples.append(sample)
-        else:
-            samples.append(0)
-
-        k = 1
-        while k < num_samples:
-            sample, state = _get_sample(samples[-1], state)
-            samples.append(sample)
-            k += 1
-        return samples, state
-
-    with tf.Graph().as_default():
-        word_to_id, id_to_word, graph_def = tf_testing.get_workload_ptb()
-        vocab_size = len(word_to_id)
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-        sess = tf.Session()
-
-    #TVM graph module creation
-    params, m = _get_tvm_graph_module(graph_def)
-
-    # Create 10 predicted statments of 20 words
-    cnt_stm = 0
-    while cnt_stm < 10:
-        cnt_stm += 1
-        in_state = np.full((num_layers, 2, batch_size, num_hidden), 0, dtype="float32")
-        seed_for_sample = inpt.split()
-        tvm_samples, tvm_state = _do_tvm_sample(m, [word_to_id[word] \
-                                                    for word in seed_for_sample],
-                                                in_state, params, cnt_sample)
-        tvm_sample_str = _pretty_print(tvm_samples, False, id_to_word)
-        tf_samples, tf_state = tf_testing.do_tf_sample(sess,
-                                [word_to_id[word] for word in seed_for_sample],
-                                in_state, cnt_sample)
-        tf_sample_str = _pretty_print(tf_samples, False, id_to_word)
-        inpt = tvm_sample_str
-        tvm.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
-        assert(tvm_sample_str == tf_sample_str)
-
-#######################################################################
-# LRN (Local Response Normalization)
-# ----------------------------------
-
-def _test_lrn(ishape, size, axis, bias, alpha, beta):
-    """ testing local response normalization """
-    lrn_depth_radius = size / 2
-
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype, name="lrn0_data")
-        nn_ops.local_response_normalization(in1,
-                                            name="lrn",
-                                            depth_radius=lrn_depth_radius,
-                                            bias=bias,
-                                            alpha=alpha,
-                                            beta=beta)
-
-        compare_tf_with_tvm(inp_array, 'lrn0_data:0', 'lrn:0')
-
-def test_forward_lrn():
-    _test_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5)
-
-#######################################################################
-# l2_normalize
-# ------------
-
-def _test_l2_normalize(ishape, eps, axis):
-    """ testing l2 normalize (uses max, sum, square, sqrt frontend operators)"""
-
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        nn.l2_normalize(in1,
-                        axis=axis,
-                        epsilon=eps,
-                        name=None,
-                        dim=None)
-
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'l2_normalize:0')
-
-def test_forward_l2_normalize():
-    _test_l2_normalize((1, 3, 20, 20), 0.001, (0,))
-
-#######################################################################
-# transpose
-# ---------
-def _test_forward_transpose(ishape, axes=None):
-    input = np.random.uniform(size=ishape).astype(np.float32)
-
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=input.shape, dtype=input.dtype, name="transpose_data")
-
-        if axes is None:
-            tf.transpose(in1)
-        else:
-            tf.transpose(in1, perm=axes)
-
-        compare_tf_with_tvm(input, 'transpose_data:0', 'transpose:0')
-
-def test_forward_transpose():
-    _test_forward_transpose((2, 3, 4))
-    _test_forward_transpose((7, 8, 8, 10))
-    _test_forward_transpose((2, 3, 4), (1, 2, 0))
-    _test_forward_transpose((2, 3, 4), (0, 1, 2))
-    _test_forward_transpose((2, 3, 4, 5), (3, 0, 1, 2))
-
-
-def test_forward_ceil():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.ceil(in1)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Ceil:0')
-
-def test_forward_floor():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.floor(in1)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Floor:0')
-
-def test_forward_relu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.relu(in1)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Relu:0')
-
-def test_forward_leaky_relu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.leaky_relu(in1, alpha=0.4)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'LeakyRelu:0')
-
-def test_forward_elu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.elu(in1)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Elu:0')
-
-def test_forward_selu():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.selu(in1)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Selu:0')
-
-def test_forward_tanh():
-    ishape = (1, 3, 10, 10)
-    inp_array = np.random.uniform(-5, 5, size=ishape).astype(np.float32)
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-        tf.nn.tanh(in1)
-        compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Tanh:0')
-
-#######################################################################
-# Mean
-# ----
-def test_forward_mean():
-    def check_mean(ishape, **kwargs):
-        inp_array = np.random.uniform(size=ishape).astype(np.float32)
-        with tf.Graph().as_default():
-            in1 = tf.placeholder(shape=inp_array.shape, dtype=inp_array.dtype)
-            tf.keras.backend.mean(in1, **kwargs)
-            compare_tf_with_tvm(inp_array, 'Placeholder:0', 'Mean:0', no_gpu=True)
-
-    check_mean((10, 8, 16, 32))
-    check_mean((10, 8, 16, 32), axis=(2,3))
-    check_mean((10, 8, 16, 32), axis=(1,2), keepdims=True)
-
-#######################################################################
-# Relational operators
-# --------------------
-def _test_forward_rel_op(data, func):
-    with tf.Graph().as_default():
-        in1 = tf.placeholder(shape=data[0].shape, dtype=data[0].dtype, name='in1')
-        in2 = tf.placeholder(shape=data[1].shape, dtype=data[1].dtype, name='in2')
-        op = func(in1, in2, name='op')
-        out = tf.cast(op, tf.int32, name='out1')
-        compare_tf_with_tvm([data[0], data[1]], ['in1:0', 'in2:0'], 'out1:0')
-
-def test_forward_rel_ops():
-    t1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-    t2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
-    _test_forward_rel_op([t1, t2], math_ops.less)
-    _test_forward_rel_op([t1, t2], math_ops.greater)
-    _test_forward_rel_op([t1, t2], math_ops.less_equal)
-    _test_forward_rel_op([t1, t2], math_ops.greater_equal)
-    _test_forward_rel_op([t1, t2], math_ops.equal)
-    _test_forward_rel_op([t1, t2], math_ops.not_equal)
-
-
-#######################################################################
-# Main
-# ----
-if __name__ == '__main__':
-    # Transforms
-    test_forward_transpose()
-    test_forward_reshape()
-    test_forward_squeeze()
-    test_forward_pack()
-    test_forward_resize_bilinear()
-    test_forward_crop()
-    test_forward_pad()
-    test_forward_gather()
-    test_forward_stridedslice()
-    test_forward_split()
-    test_forward_unstack()
-
-    # Activations
-    test_forward_sigmoid()
-    test_forward_relu()
-    test_forward_leaky_relu()
-    test_forward_elu()
-    test_forward_selu()
-    test_forward_tanh()
-
-    # Reductions
-    test_forward_argminmax()
-    test_forward_reduce()
-    test_forward_mean()
-
-    # NN
-    test_forward_convolution()
-    test_forward_pooling()
-    if tf.__version__ == '1.4.1':
-        _test_forward_concat_v2()
-    test_forward_lrn()
-    test_forward_l2_normalize()
-
-    # General
-    test_forward_multi_input()
-    test_forward_multi_output()
-    test_forward_variable()
-
-    # End to End
-    test_forward_inception_v3()
-    test_forward_inception_v1()
-    test_forward_mobilenet()
-    test_forward_resnetv2()
-    test_forward_placeholder()
-    test_forward_ptb()
-
-    # RNN
-    test_forward_lstm()
-
-    # Elementwise
-    test_forward_ceil()
-    test_forward_floor()
-
-    # Relational ops
-    test_forward_rel_ops()
-    test_forward_logical()
diff --git a/nnvm/tests/python/unittest/test_correct_layout.py b/nnvm/tests/python/unittest/test_correct_layout.py
deleted file mode 100644
index 5d313fbacb3e..000000000000
--- a/nnvm/tests/python/unittest/test_correct_layout.py
+++ /dev/null
@@ -1,379 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-import nnvm.symbol as sym
-import nnvm.graph as graph
-from nnvm.compiler import graph_attr
-
-def correct_layout(g, layout=None):
-    if isinstance(g, nnvm.symbol.Symbol):
-        g = graph.create(g)
-    if layout:
-        graph_attr.set_layout_inputs(g, layout)
-    g = g.apply("CorrectLayout")
-    ldict = {}
-    vlayout = g.json_attr("layout")
-    entry_ptr = g.index.entry_ptr
-    for i, n in enumerate(g.index.nodes):
-        begin, end = entry_ptr[i], entry_ptr[i + 1]
-        ldict[n["name"]] = vlayout[begin:end]
-    return g, ldict
-
-
-# Level 1
-def test_dense():
-    x = sym.Variable("data", shape=(10, 20))
-    y = sym.dense(x, units=30, name="fc")
-    g, ldict = correct_layout(y, "HW")
-    assert(ldict["data"][0] == "HW")
-    assert(ldict["fc"][0] == "HW")
-    assert(ldict["fc_bias"][0] == "__undef__")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, "HW16w")
-    assert(ldict["data"][0] == "HW16w")
-    assert(ldict["data_HW"][0] == "HW")
-    assert(ldict["fc"][0] == "HW")
-    assert(ldict["fc_bias"][0] == "__undef__")
-
-
-def test_matmul():
-    a = sym.Variable("a", shape=(10, 20))
-    b = sym.Variable("b", shape=(20, 30))
-    c = sym.matmul(a, b, name="matmul")
-    g, ldict = correct_layout(c, {"a" : "HW", "b" : "WC"})
-    assert(ldict["a"][0] == "HW")
-    assert(ldict["b"][0] == "WC")
-    assert(ldict["matmul"][0] == "HC")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, {"a" : "HW16w", "b" : "WC16c"})
-    assert(ldict["a"][0] == "HW16w")
-    assert(ldict["a_HW"][0] == "HW")
-    assert(ldict["b"][0] == "WC16c")
-    assert(ldict["b_WC"][0] == "WC")
-    assert(ldict["matmul"][0] == "HC")
-    a = sym.Variable("a", shape=(20, 10))
-    c = sym.matmul(a, b, name="matmul", transpose_a=True)
-    g, ldict = correct_layout(c, {"a" : "HW", "b" : "HC"})
-    assert(ldict["a"][0] == "HW")
-    assert(ldict["b"][0] == "HC")
-    assert(ldict["matmul"][0] == "WC")
-    b = sym.Variable("b", shape=(30, 20))
-    c = sym.matmul(a, b, name="matmul", transpose_b=True)
-    g, ldict = correct_layout(c, {"a" : "HW", "b" : "CW"})
-    assert(ldict["a"][0] == "HW")
-    assert(ldict["b"][0] == "CW")
-    assert(ldict["matmul"][0] == "HC")
-    a = sym.Variable("a", shape=(20, 10))
-    b = sym.Variable("b", shape=(30, 20))
-    c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True)
-    g, ldict = correct_layout(c, {"a" : "HW", "b" : "CH"})
-    assert(ldict["a"][0] == "HW")
-    assert(ldict["b"][0] == "CH")
-    assert(ldict["matmul"][0] == "WC")
-
-
-def test_concatenate():
-    x1 = sym.Variable("x", shape=(10, 20))
-    x2 = sym.Variable("y", shape=(10, 30))
-    z = sym.concatenate(x1, x2, name="concat")
-    g, ldict = correct_layout(z, {"x": "HW", "y": "HW"})
-    assert(ldict["x"][0] == "HW")
-    assert(ldict["y"][0] == "HW")
-    assert(ldict["concat"][0] == "HW")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, {"x": "HW16w", "y": "HW16w"})
-    assert(ldict["x"][0] == "HW16w")
-    assert(ldict["y"][0] == "HW16w")
-    assert(ldict["concat"][0] == "HW16w")
-
-    x1 = sym.Variable("x", shape=(10, 20, 60))
-    x2 = sym.Variable("y", shape=(10, 20, 40))
-    z = sym.concatenate(x1, x2, axis=2, name="concat")
-    g, ldict = correct_layout(z, {"x": "H20wW", "y": "H20wW"})
-    assert(ldict["x"][0] == "H20wW")
-    assert(ldict["y"][0] == "H20wW")
-    assert(ldict["concat"][0] == "H20wW")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, {"x": "HW", "y": "HW"})
-    assert(ldict["x_H20wW"][0] == "H20wW")
-    assert(ldict["x_H20wW"][0] == "H20wW")
-    assert(ldict["concat"][0] == "H20wW")
-
-
-def test_expand_dims():
-    x = sym.Variable("x", shape=(10, 20))
-    y = sym.expand_dims(x, axis=1, name="y")
-    g, ldict = correct_layout(y, "HW")
-    assert(ldict["x"][0] == "HW")
-    assert(ldict["y"][0] == "__undef__")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, "HW16w")
-    assert(ldict["x"][0] == "HW16w")
-    assert(ldict["x_HW"][0] == "HW")
-    assert(ldict["y"][0] == "__undef__")
-
-
-def test_split():
-    x = sym.Variable("x", shape=(10, 20))
-    y = sym.split(x, indices_or_sections=[11], name="y")
-    g, ldict = correct_layout(y, "HW")
-    assert(ldict["x"][0] == "HW")
-    assert(ldict["y"][0] == "__undef__")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, "HW16w")
-    assert(ldict["x"][0] == "HW16w")
-    assert(ldict["x_HW"][0] == "HW")
-    assert(ldict["y"][0] == "__undef__")
-
-
-def test_batchnorm():
-    x = sym.Variable("data", shape=(10, 20, 30, 40))
-    y = sym.batch_norm(x, axis=1, epsilon=2e-5, name="bn")
-    g, ldict = correct_layout(y, "NCHW")
-    assert(ldict["data"][0] == "NCHW")
-    assert(ldict["bn"][0] == "NCHW")
-    assert(ldict["bn"][1] == "C")
-    assert(ldict["bn"][2] == "C")
-    assert(ldict["bn_beta"][0] == "C")
-    assert(ldict["bn_gamma"][0] == "C")
-    assert(ldict["bn_moving_mean"][0] == "C")
-    assert(ldict["bn_moving_var"][0] == "C")
-    # batch_norm can deal with sub-dim of C at the last dim.
-    g, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["data"][0] == "NCHW16c")
-    assert(ldict["bn"][0] == "NCHW16c")
-    assert(ldict["bn"][1] == "C16c")
-    assert(ldict["bn"][2] == "C16c")
-    assert(ldict["bn_beta"][0] == "C")
-    assert(ldict["bn_beta_C16c"][0] == "C16c")
-    assert(ldict["bn_gamma"][0] == "C")
-    assert(ldict["bn_gamma_C16c"][0] == "C16c")
-    assert(ldict["bn_moving_mean"][0] == "C")
-    assert(ldict["bn_moving_mean_C16c"][0] == "C16c")
-    assert(ldict["bn_moving_var"][0] == "C")
-    assert(ldict["bn_moving_var_C16c"][0] == "C16c")
-    # but for other layout, it does a layout transform for data
-    g, ldict = correct_layout(g, "NCH16cW")
-    assert(ldict["data"][0] == "NCH16cW")
-    assert(ldict["data_NCHW16c"][0] == "NCHW16c")
-    assert(ldict["bn"][0] == "NCHW16c")
-    assert(ldict["bn"][1] == "C16c")
-    assert(ldict["bn"][2] == "C16c")
-    assert(ldict["bn_beta"][0] == "C")
-    assert(ldict["bn_beta_C16c"][0] == "C16c")
-    assert(ldict["bn_gamma"][0] == "C")
-    assert(ldict["bn_gamma_C16c"][0] == "C16c")
-    assert(ldict["bn_moving_mean"][0] == "C")
-    assert(ldict["bn_moving_mean_C16c"][0] == "C16c")
-    assert(ldict["bn_moving_var"][0] == "C")
-    assert(ldict["bn_moving_var_C16c"][0] == "C16c")
-
-
-def test_flatten():
-    x = sym.Variable("x", shape=(10, 20, 10, 10))
-    y = sym.flatten(x, name="y")
-    g, ldict = correct_layout(y, "NCHW")
-    assert(ldict["x"][0] == "NCHW")
-    assert(ldict["y"][0] == "__undef__")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["x"][0] == "NCHW16c")
-    assert(ldict["x_NCHW"][0] == "NCHW")
-    assert(ldict["y"][0] == "__undef__")
-
-
-def test_softmax():
-    x = sym.Variable("x", shape=(10, 20, 10, 10))
-    y = sym.softmax(x, name="y")
-    g, ldict = correct_layout(y, "NCHW")
-    assert(ldict["x"][0] == "NCHW")
-    assert(ldict["y"][0] == "NCHW")
-    # second pass will insert layout transform
-    _, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["x"][0] == "NCHW16c")
-    assert(ldict["x_NCHW"][0] == "NCHW")
-    assert(ldict["y"][0] == "NCHW")
-
-
-# Level 2
-def test_conv2d():
-    x = sym.Variable("data", shape=(1, 32, 512, 512))
-    y = sym.conv2d(x, name="conv", channels=12,
-                   kernel_size=(3,3), padding=(1,1), layout="NCHW")
-    _, ldict = correct_layout(y)
-    assert(ldict["data"][0] == "NCHW")
-    assert(ldict["conv_weight"][0] == "OIHW")
-    assert(ldict["conv_bias"][0] == "C")
-    assert(ldict["conv"][0] == "NCHW")
-    y = sym.conv2d(x, name="conv", channels=12,
-                   kernel_size=(3,3), padding=(1,1), layout="NCHW16c",
-                   kernel_layout="OIHW16i16o", out_layout="NCHW8c")
-    _, ldict = correct_layout(y)
-    assert(ldict["data"][0] == "NCHW16c")
-    assert(ldict["conv_weight"][0] == "OIHW16i16o")
-    assert(ldict["conv_bias"][0] == "C8c")
-    assert(ldict["conv"][0] == "NCHW8c")
-    y = sym.conv2d(x, name="conv", channels=12,
-                   kernel_size=(3,3), padding=(1,1), layout="N16cHWC")
-    _, ldict = correct_layout(y)
-    assert(ldict["data"][0] == "N16cHWC")
-    assert(ldict["conv_weight"][0] == "OIHW")
-    assert(ldict["conv_bias"][0] == "16cC")
-    assert(ldict["conv"][0] == "N16cHWC")
-
-
-def test_conv2d_transpose():
-    x = sym.Variable("data", shape=(1, 32, 512, 512))
-    y = sym.conv2d_transpose(x, name="conv", channels=12,
-                             kernel_size=(3,3), padding=(1,1), layout="NCHW")
-    _, ldict = correct_layout(y)
-    assert(ldict["data"][0] == "NCHW")
-    assert(ldict["conv_weight"][0] == "OIHW")
-    assert(ldict["conv_bias"][0] == "C")
-    assert(ldict["conv"][0] == "NCHW")
-
-
-def test_max_pool2d():
-    x = sym.Variable("data", shape=(1, 32, 512, 512))
-    y = sym.max_pool2d(x, name="pool", pool_size=(3,3),
-                       padding=(1,1), layout="NCHW")
-    g, ldict = correct_layout(y)
-    assert(ldict["data"][0] == "NCHW")
-    assert(ldict["pool"][0] == "NCHW")
-    # if index of H and W remain the same,
-    # pool2d does not convert the layout.
-    g, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["data"][0] == "NCHW16c")
-    assert(ldict["pool"][0] == "NCHW16c")
-    # for other layout it requires a layout transform.
-    g, ldict = correct_layout(g, "NHWC")
-    assert(ldict["data"][0] == "NHWC")
-    assert(ldict["data_NCHW"][0] == "NCHW")
-    assert(ldict["pool"][0] == "NCHW")
-
-
-def test_global_pool2d():
-    x = sym.Variable("data", shape=(1, 32, 512, 512))
-    y = sym.global_max_pool2d(x, name="pool", layout="NCHW")
-    g, ldict = correct_layout(y)
-    assert(ldict["data"][0] == "NCHW")
-    assert(ldict["pool"][0] == "NCHW")
-    # if index of H and W remain the same,
-    # pool2d does not convert the layout.
-    g, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["data"][0] == "NCHW16c")
-    assert(ldict["pool"][0] == "NCHW16c")
-    # for other layout it requires a layout transform.
-    g, ldict = correct_layout(g, "NHWC")
-    assert(ldict["data"][0] == "NHWC")
-    assert(ldict["data_NCHW"][0] == "NCHW")
-    assert(ldict["pool"][0] == "NCHW")
-
-
-# Level 3
-def test_reshape():
-    x = sym.Variable("x", shape=(4,))
-    y = sym.reshape(x, shape=(2,2), name="y")
-    g, ldict = correct_layout(y, "C")
-    assert(ldict["x"][0] == "C")
-    assert(ldict["y"][0] == "__undef__")
-    # second pass will insert layout transform
-    g, ldict = correct_layout(g, "C16c")
-    assert(ldict["x"][0] == "C16c")
-    assert(ldict["x_C"][0] == "C")
-    assert(ldict["y"][0] == "__undef__")
-
-
-def test_transpose():
-    x = sym.Variable("x", shape=(1, 32, 512, 512))
-    y = sym.transpose(x, name="y", axes=(0, 2, 3, 1))
-    g, ldict = correct_layout(y, "NCHW")
-    assert(ldict["x"][0] == "NCHW")
-    assert(ldict["y"][0] == "NHWC")
-    # second pass will insert layout transform
-    g, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["x"][0] == "NCHW16c")
-    assert(ldict["x_NCHW"][0] == "NCHW")
-    assert(ldict["y"][0] == "NHWC")
-
-
-def test_broadcast_to():
-    x = sym.Variable("x", shape=(4, 1))
-    y = sym.broadcast_to(x, shape=(0, 4), name="y")
-    g, ldict = correct_layout(y, "HW")
-    assert(ldict["x"][0] == "HW")
-    assert(ldict["y"][0] == "__undef__")
-    # second pass will insert layout transform
-    g, ldict = correct_layout(g, "HW16h")
-    assert(ldict["x"][0] == "HW16h")
-    assert(ldict["x_HW"][0] == "HW")
-    assert(ldict["y"][0] == "__undef__")
-
-
-def test_broadcast_binary():
-    x = sym.Variable("x", shape=(1, 16, 512, 512))
-    y = sym.Variable("y", shape=(16, 512, 512))
-    z = sym.broadcast_add(x, y, name="z")
-    g, ldict = correct_layout(z, {"x": "NCHW", "y": "CHW"})
-    assert(ldict["x"][0] == "NCHW")
-    assert(ldict["y"][0] == "CHW")
-    assert(ldict["z"][0] == "NCHW")
-    # prior to keep the left layout if they do not match.
-    g, ldict = correct_layout(g, {"x": "NCHW16c", "y": "CHW"})
-    assert(ldict["x"][0] == "NCHW16c")
-    assert(ldict["y"][0] == "CHW")
-    assert(ldict["y_CHW16c"][0] == "CHW16c")
-    assert(ldict["z"][0] == "NCHW16c")
-    # broadcast_add(HCW16c, N16nCH16cW)
-    g, ldict = correct_layout(z, {"x": "HCW16c", "y": "N16nCH16cW"})
-    assert(ldict["x"][0] == "HCW16c")
-    assert(ldict["y"][0] == "N16nCH16cW")
-    assert(ldict["x_CH16cW"][0] == "CH16cW")
-    assert(ldict["z"][0] == "N16nCH16cW")
-
-
-def test_reduce():
-    x = sym.Variable("x", shape=(1, 16, 512, 512))
-    y = sym.sum(x, name="y", axis=1)
-    g, ldict = correct_layout(y, "NCHW")
-    assert(ldict["x"][0] == "NCHW")
-    assert(ldict["y"][0] == "__undef__")
-    # second pass will insert layout transform
-    g, ldict = correct_layout(g, "NCHW16c")
-    assert(ldict["x"][0] == "NCHW16c")
-    assert(ldict["x_NCHW"][0] == "NCHW")
-    assert(ldict["y"][0] == "__undef__")
-
-
-if __name__ == "__main__":
-    test_dense()
-    test_matmul()
-    test_concatenate()
-    test_expand_dims()
-    test_split()
-    test_batchnorm()
-    test_flatten()
-    test_softmax()
-    test_conv2d()
-    test_conv2d_transpose()
-    test_max_pool2d()
-    test_global_pool2d()
-    test_reshape()
-    test_transpose()
-    test_broadcast_to()
-    test_broadcast_binary()
-    test_reduce()
diff --git a/nnvm/tests/python/unittest/test_graph.py b/nnvm/tests/python/unittest/test_graph.py
deleted file mode 100644
index 1ba0a2487cee..000000000000
--- a/nnvm/tests/python/unittest/test_graph.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import json
-import nnvm.symbol as sym
-import nnvm.graph as graph
-import nnvm.compiler.graph_util as graph_util
-
-def test_json_pass():
-    x = sym.Variable('x')
-    y = sym.dense(data=x, name='conv', units=30)
-    g = graph.create(y)
-    ret = g.apply('SaveJSON')
-    ret._set_json_attr('json', ret.json_attr('json'))
-    g2 = ret.apply('LoadJSON')
-    assert g2.apply('SaveJSON').json_attr('json') == ret.json_attr('json')
-    json = g.json()
-    g2 = graph.load_json(json)
-    assert json == g2.json()
-
-
-def test_json_pass_with_attr():
-    x = sym.Variable('x')
-    y = sym.dense(data=x, name='fc', units=30)
-    g = graph.create(y)
-    g._set_json_attr('version', '0.1.0')
-    ret = g.apply('SaveJSON')
-    json_str = ret.json_attr('json')
-    ret._set_json_attr('json', json_str)
-    g2 = ret.apply('LoadJSON')
-    assert g2.json_attr('version') == '0.1.0'
-
-
-def test_graph_json_attr():
-    x = sym.Variable('x')
-    y = sym.dense(data=x, name='fc', units=30)
-    g = graph.create(y)
-    g._set_json_attr('ilist', [1,2,3], 'list_int')
-    assert g.json_attr('ilist') == [1,2,3]
-
-def test_list_args():
-    x = sym.Variable('x')
-    z = sym.Variable('z')
-    y = sym.dense(data=x, name='fc', units=30)
-    y = sym.elemwise_add(y, z, name='add1')
-
-def test_infer_shape():
-    x = sym.Variable('x', shape=(2, 4, 2))
-    y = sym.elemwise_add(x, x, name='add1')
-    y = sym.flatten(y, name="flatten")
-    g = graph.create(y)
-    g._set_json_attr("shape_attr_key", "shape")
-    g = g.apply('InferShape')
-    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
-    jnodes = jgraph['nodes']
-    jnode_row_ptr = jgraph['node_row_ptr']
-    nindex = {n['name']: i for i, n in enumerate(jnodes)}
-    assert g.json_attr('shape')[jnode_row_ptr[nindex["flatten"]]] == [2, 8]
-    assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [2, 4, 2]
-
-def test_infer_shape_known_partial():
-    x = sym.Variable('x')
-    y = sym.elemwise_add(x, x, name='add1')
-    y = sym.flatten(y, name="flatten1")
-    g = graph.create(y)
-    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
-    shape = [[2, 4, 2], [] , []]
-    g._set_json_attr("shape", shape, 'list_shape')
-    g = g.apply("InferShape")
-    jnodes = jgraph['nodes']
-    jnode_row_ptr = jgraph['node_row_ptr']
-    nindex = {n['name']: i for i, n in enumerate(jnodes)}
-    assert g.json_attr('shape')[jnode_row_ptr[nindex["flatten1"]]] == [2, 8]
-    assert g.json_attr('shape')[jnode_row_ptr[nindex["add1"]]] == [2, 4, 2]
-
-def test_infer_type():
-    x = sym.Variable('x', dtype=0)
-    y = sym.elemwise_add(x, x, name='add1')
-    y = sym.cast(y, dtype="float64", name="cast1")
-    g = graph.create(y)
-    g._set_json_attr("dtype_attr_key", "dtype")
-    g = g.apply('InferType')
-    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
-    jnodes = jgraph['nodes']
-    jnode_row_ptr = jgraph['node_row_ptr']
-    nindex = {n['name']: i for i, n in enumerate(jnodes)}
-    assert g.json_attr('dtype')[jnode_row_ptr[nindex["cast1"]]] == 1
-    assert g.json_attr('dtype')[jnode_row_ptr[nindex["add1"]]] == 0
-
-def test_plan_memory():
-    x = sym.Variable('x', shape=(4, 2))
-    x2 = sym.elemwise_add(x, x, name='addk')
-    y = sym.flatten(x2, name="reshapek")
-    y = sym.elemwise_add(y, x2, name="add2")
-    y = sym.elemwise_add(y, y)
-    g = graph.create(y)
-    g._set_json_attr("shape_attr_key", "shape")
-    g = g.apply(["InferShape", "InferType", "PlanMemory"])
-    jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
-    jnodes = jgraph['nodes']
-    jnode_row_ptr = jgraph['node_row_ptr']
-    storage_id = g.json_attr('storage_id')
-    nindex = {n['name']: i for i, n in enumerate(jnodes)}
-    assert (storage_id[jnode_row_ptr[nindex["addk"]]] !=
-            storage_id[jnode_row_ptr[nindex["reshapek"]]])
-    assert (storage_id[jnode_row_ptr[nindex["add2"]]] ==
-            storage_id[jnode_row_ptr[nindex["reshapek"]]])
-
-def test_print_graph_ir():
-    x = sym.Variable("x", shape=(1, 1, 10, 20))
-    y = sym.conv2d(x + 1, name="y", channels=10, kernel_size=(3,3))
-    g = graph.create(y)
-    g = g.apply("InferShape")
-    ir1 = g.ir()
-    ir2 = g.ir(join_entry_attrs=["shape"])
-    assert("y_bias" in ir1)
-    assert("shape=" in ir2)
-
-def test_gradient():
-    x = sym.Variable("x")
-    y = sym.Variable("y")
-    z1 = sym.elemwise_add(x, sym.sqrt(y))
-    z2 = sym.log(x)
-    gradient = graph_util.gradients([z1, z2], [x, y])
-    assert len(gradient) == 2
-
-    g1 = sym.Variable("g1")
-    g2 = sym.Variable("g2")
-    grad_ys = [g1, g2]
-    gradient = graph_util.gradients(sym.Group([z1, z2]),
-                               sym.Group([x, y]), grad_ys=grad_ys)
-    g_graph = graph.create(sym.Group(gradient)).ir()
-    assert len(gradient) == 2
-    assert "g1" in g_graph
-    assert "g2" in g_graph
-
-if __name__ == "__main__":
-    test_print_graph_ir()
-    test_json_pass_with_attr()
-    test_graph_json_attr()
-    test_json_pass()
-    test_infer_shape()
-    test_infer_shape_known_partial()
-    test_infer_type()
-    test_plan_memory()
-    test_list_args()
-    test_gradient()
diff --git a/nnvm/tests/python/unittest/test_graph_gradient.py b/nnvm/tests/python/unittest/test_graph_gradient.py
deleted file mode 100644
index 4ae6053c946f..000000000000
--- a/nnvm/tests/python/unittest/test_graph_gradient.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-from nnvm.compiler import graph_util
-
-def test_cnn_gradients():
-    # input data
-    h = 128
-    w = 128
-    data_shape = (1000, 3, h, w)
-    data = sym.Variable('data', shape=data_shape, dtype=0)
-
-    # conv2d
-    num_channels = 64
-    kernel_size = 32
-    conv_w_shape = (num_channels, 3, kernel_size, kernel_size)
-    conv_b_shape = (num_channels,)
-    conv_w = sym.Variable('conv_w', shape=conv_w_shape)
-    conv_b = sym.Variable('conv_b', shape=conv_b_shape)
-    conv1 = sym.conv2d(data=data, weight=conv_w, bias=conv_b,
-                      channels=num_channels, kernel_size=(kernel_size, kernel_size),
-                      name='conv1')
-    # relu1
-    relu1 = sym.relu(data=conv1, name='relu1')
-    # max pooling
-    max_pooling1 = sym.max_pool2d(data=relu1, pool_size=(2, 2), name='max_pooling1')
-    # flatten
-    flatten1 = sym.flatten(data=max_pooling1)
-    # shape after flatten
-    flatten_out_shape = (h - kernel_size) * (w - kernel_size) * num_channels
-    # dense1
-    dense1_hidden_units = 100
-    dense1 = sym.dense(data=flatten1, name='dense1', units=dense1_hidden_units)
-    # relu2
-    relu2 = sym.relu(data=dense1, name='relu2')
-    # dense2
-    dense2_hidden_units = 10
-    dense2 = sym.dense(data=relu2, name='dense2', units=dense2_hidden_units)
-    # softmax
-    mlp = sym.softmax(data=dense2, name='softmax')
-    # fake non-sparse label
-    label = sym.full_like(mlp, fill_value=1)
-    # cross entropy loss
-    ce_loss = sym.sum(
-        sym.elemwise_mul(sym.log_softmax(dense2), label),
-        axis=1,
-        keepdims=True,
-        name="ce_loss")
-
-    # input variables:
-    # print grad_g.symbol.list_input_names()
-    # >> ['data', 'conv_w', 'conv_b',
-    #     'dense1_weight', 'dense1_bias',
-    #     'dense2_weight', 'dense2_bias']
-
-    # output gradient variables:
-    # print grad_g.symbol.list_output_names()
-    # >> ['conv1_grad_data', 'conv1_grad_weight', 'conv1_grad_bias',
-    #     'dense1_grad_weight', 'dense1_grad_bias',
-    #     'dense2_grad_weight', 'dense2_grad_bias']
-    grad_g = graph_util.get_gradient_graph(ce_loss, ce_loss.list_input_variables())
-
-    # infer shape
-    in_shapes, out_shapes = graph_util.infer_shape(grad_g)
-
-    # forward graph shape
-    assert in_shapes == [list(data_shape), list(conv_w_shape), list(conv_b_shape),
-                          [dense1_hidden_units, flatten_out_shape], [dense1_hidden_units],
-                          [dense2_hidden_units, dense1_hidden_units], [dense2_hidden_units]]
-    # input grads shape should be equal with input shape
-    assert in_shapes == out_shapes
-
-    # output grads w.r.t input variables
-    grads = graph_util.gradients(ce_loss, ce_loss.list_input_variables())
-
-    # gradients number should be equal with grad_input number
-    assert len(grads) == len(ce_loss.list_input_variables())
-
-    # infer type
-    in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
-    assert out_dtypes == ['float32', 'float32', 'float32', 'float32', 'float32', 'float32', 'float32']
-
-def test_multi_loss_graph_gradients():
-    # input data
-    shape1 = (1000, 100)
-    data1 = sym.Variable('data1', shape=(1000, 100), dtype=0)
-
-    # fake non-sparse label
-    label = sym.full(fill_value=3)
-
-    # square loss
-    sub1 = sym.elemwise_sub(data1, label, name="sub1")
-    square_loss = sym.sum(data=sub1**2, axis=1, name="square_loss")
-
-    # fake loss1
-    shape2 = (1000, )
-    data2 = sym.Variable('data2', shape=shape2, dtype=0)
-    loss1 = sym.sqrt(data2, name="loss1")
-
-    # fake loss2
-    loss2 = sym.relu(data1, name='loss2')
-
-    # block loss1
-    total_loss = sym.elemwise_sum(
-        sym.block_grad(loss1),
-        square_loss,
-        num_args=2,
-        name="total_loss")
-
-    # grad_g.symbol.list_output_names()
-    # >> ['loss1_grad_0_output', 'grad_sum_output']
-    grad_g = graph_util.get_gradient_graph([total_loss, loss2], total_loss.list_input_variables())
-    # infer shape
-    in_shapes, out_shapes = graph_util.infer_shape(grad_g)
-    assert out_shapes == [list(shape2), list(shape1)]
-
-    # grad_data1 is elemwise_sum of grad_loss2, grad_square_loss
-    grad_data1 = grad_g.symbol[1]
-    assert grad_data1.list_attr()['num_args'] == '2'
-
-    # block grad should return zero grad
-    grad_data2 = grad_g.symbol[0]
-    assert 'zeros_like' in grad_g.ir()
-
-    # test reverse infer shape for label
-    assert grad_g.apply('InferShape').json_attr('shape_num_unknown_nodes') == 0
-
-    # infer type
-    in_dtypes, out_dtypes = graph_util.infer_dtype(grad_g)
-    assert out_dtypes == ['float32', 'float32']
-
-    # test reverse infer type for label
-    assert grad_g.apply('InferType').json_attr('dtype_num_unknown_nodes') == 0
-
-
-if __name__ == "__main__":
-    test_cnn_gradients()
-    test_multi_loss_graph_gradients()
diff --git a/nnvm/tests/python/unittest/test_infer_shape.py b/nnvm/tests/python/unittest/test_infer_shape.py
deleted file mode 100644
index c394fab562f2..000000000000
--- a/nnvm/tests/python/unittest/test_infer_shape.py
+++ /dev/null
@@ -1,415 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import json
-import nnvm.symbol as sym
-import nnvm.graph as graph
-
-def infer_shape(sym):
-    g = graph.create(sym)
-    g._set_json_attr("shape_attr_key", "shape")
-    g = g.apply("InferShape")
-    sdict = {}
-    vshape = g.json_attr("shape")
-    entry_ptr = g.index.entry_ptr
-    for i, n in enumerate(g.index.nodes):
-        begin, end = entry_ptr[i], entry_ptr[i + 1]
-        sdict[n["name"]] = vshape[begin:end]
-    return sdict
-
-# Level 1
-def test_dense():
-    x = sym.Variable("x", shape=(10, 20))
-    y = sym.dense(x, units=30, name="fc")
-    sdict = infer_shape(y)
-    assert(sdict["fc"][0] == [10, 30])
-    assert(sdict["fc_bias"][0] == [30])
-
-
-def test_matmul():
-    a = sym.Variable('a', shape=(10, 20))
-    b = sym.Variable('b', shape=(20, 30))
-    c = sym.matmul(a, b, name="matmul")
-    sdict = infer_shape(c)
-    assert(sdict["matmul"][0] == [10, 30])
-    a = sym.Variable('a', shape=(20, 10))
-    c = sym.matmul(a, b, name="matmul", transpose_a=True)
-    sdict = infer_shape(c)
-    assert(sdict["matmul"][0] == [10, 30])
-    b = sym.Variable('b', shape=(30, 20))
-    c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True)
-    sdict = infer_shape(c)
-    assert(sdict["matmul"][0] == [10, 30])
-    a = sym.Variable('a', shape=(10, 20))
-    c = sym.matmul(a, b, name="matmul", transpose_b=True)
-    sdict = infer_shape(c)
-    assert(sdict["matmul"][0] == [10, 30])
-    a = sym.Variable('a', shape=(10, 20, 30))
-    b = sym.Variable('b', shape=(30, 40, 50))
-    c = sym.matmul(a, b, name="matmul")
-    sdict = infer_shape(c)
-    assert(sdict["matmul"][0] == [10, 20, 40, 50])
-    a = sym.Variable('a', shape=(30, 20, 10))
-    b = sym.Variable('b', shape=(50, 40, 30))
-    c = sym.matmul(a, b, name="matmul", transpose_a=True, transpose_b=True)
-    sdict = infer_shape(c)
-    assert(sdict["matmul"][0] == [10, 20, 40, 50])
-
-
-def test_concatenate():
-    x1 = sym.Variable("x", shape=(10, 20))
-    x2 = sym.Variable("y", shape=(10, 30))
-    z = sym.concatenate(x1, x2, name="concat")
-    sdict = infer_shape(z)
-    assert(sdict["concat"][0] == [10, 50])
-    z = sym.concatenate(x1, x1, axis=0, name="concat")
-    sdict = infer_shape(z)
-    assert(sdict["concat"][0] == [20, 20])
-
-
-def test_expand_dims():
-    x = sym.Variable("x", shape=(10, 20))
-    y = sym.expand_dims(x, axis=1, name="y")
-    sdict = infer_shape(y)
-    assert(sdict["y"][0] == [10, 1, 20])
-    y = sym.expand_dims(x, axis=-1, name="y", num_newaxis=2)
-    sdict = infer_shape(y)
-    assert(sdict["y"][0] == [10, 20, 1, 1])
-
-
-def test_split():
-    x1 = sym.Variable("x", shape=(10, 20))
-    z = sym.split(x1, indices_or_sections=[11], name="y")
-    sdict = infer_shape(z)
-    assert(sdict["y"][0] == [10, 11])
-    assert(sdict["y"][1] == [10, 9])
-    z = sym.split(x1, indices_or_sections=2, name="y")
-    sdict = infer_shape(z)
-    assert(sdict["y"][0] == [10, 10])
-    assert(sdict["y"][1] == [10, 10])
-    z = sym.split(x1, indices_or_sections=[6], axis=-1, name="y")
-    sdict = infer_shape(z)
-    assert(sdict["y"][0] == [10, 6])
-    assert(sdict["y"][1] == [10, 14])
-
-
-def test_batchnorm():
-    x = sym.Variable("x", shape=(10, 20))
-    y = sym.batch_norm(1 / x, name="bn")
-    sdict = infer_shape(y)
-    assert(sdict["bn_gamma"][0] == [20])
-
-    x = sym.Variable("x", shape=(10, 20, 30, 40))
-    y = sym.batch_norm(data=x, axis=0, epsilon=2e-5, name='bn')
-    sdict = infer_shape(y)
-    assert(sdict['bn_moving_var'][0] == [10])
-
-    y = sym.batch_norm(data=x, axis=1, epsilon=2e-5, name='bn')
-    sdict = infer_shape(y)
-    assert(sdict['bn_gamma'][0] == [20])
-
-    y = sym.batch_norm(data=x, axis=2, epsilon=2e-5, name='bn')
-    sdict = infer_shape(y)
-    assert(sdict['bn_beta'][0] == [30])
-
-    y = sym.batch_norm(data=x, axis=3, epsilon=2e-5, name='bn')
-    sdict = infer_shape(y)
-    assert(sdict['bn_moving_mean'][0] == [40])
-
-def test_flatten():
-    x = sym.Variable("x", shape=(10, 20, 10))
-    y = sym.flatten(x) * 2
-    y = sym.exp(y, name="y")
-    sdict = infer_shape(y)
-    assert(sdict["y"][0] == [10, 200])
-
-def test_squeeze():
-    x = sym.Variable("x", shape=(1, 1, 1, 10))
-    y = sym.squeeze(x, axis=(1,2), name='squeeze')
-    sdict = infer_shape(y)
-    assert(sdict['squeeze'][0] == [1, 10])
-
-    x = sym.Variable("x", shape=(1, 3, 1))
-    y = sym.squeeze(x, name='squeeze')
-    sdict = infer_shape(y)
-    assert(sdict['squeeze'][0] == [3])
-
-    y = sym.squeeze(x, axis=(0), name='squeeze')
-    sdict = infer_shape(y)
-    assert(sdict['squeeze'][0] == [3, 1])
-
-    y = sym.squeeze(x, axis=(0,2), name='squeeze')
-    sdict = infer_shape(y)
-    assert(sdict['squeeze'][0] == [3])
-
-# Level 2
-def test_conv2d():
-    def check(in_shape, out_shape, **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.conv2d(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 10, 10, 12),
-          (4, 12, 10, 12),
-          channels=12,
-          kernel_size=(3,3),
-          padding=(1,1))
-    check((4, 10, 12, 4),
-          (4, 8, 8, 5),
-          channels=5,
-          kernel_size=(3, 5),
-          layout="NHWC")
-    check((4, 10, 12, 4),
-          (4, 6, 8, 5),
-          channels=5,
-          dilation=(2, 2),
-          kernel_size=(3, 3),
-          layout="NHWC")
-    check((4, 10, 12, 4),
-          (4, 5, 6, 5),
-          channels=5,
-          strides=(2, 2),
-          kernel_size=(3, 3),
-          padding=(1, 1),
-          layout="NHWC")
-
-
-def test_conv2d_packed():
-    def check(in_shape,
-              out_shape,
-              kernel_shape,
-              **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.conv2d(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-        assert(tuple(sdict["y_weight"][0]) == tuple(kernel_shape))
-
-    check((4, 10, 10, 12, 1, 8),
-          (4, 10, 10, 2, 1, 8),
-          (2, 12, 3, 3, 8, 8),
-          channels=8 * 2,
-          kernel_size=(3,3),
-          padding=(1,1),
-          layout="NHWC1n8c",
-          kernel_layout="OIHW8o8i")
-
-
-def test_conv2d_transpose():
-    def check(in_shape, out_shape, **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.conv2d_transpose(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 10, 10, 12),
-          (4, 15, 10, 12),
-          channels=15,
-          kernel_size=(3,3),
-          padding=(1,1))
-    check((4, 10, 10, 12),
-          (4, 15, 10, 14),
-          channels=15,
-          kernel_size=(3, 5),
-          padding=(1, 1))
-    check((4, 10, 10, 12),
-          (4, 15, 11, 15),
-          channels=15,
-          kernel_size=(3, 5),
-          padding=(1, 1),
-          output_padding=(1, 1))
-    check((4, 10, 10, 12),
-          (4, 15, 15, 11),
-          channels=11,
-          kernel_size=(5, 5),
-          output_padding=(1, 1),
-          layout="NHWC")
-
-
-def test_max_pool2d():
-    def check(in_shape, out_shape, **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.max_pool2d(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 10, 12, 12),
-          (4, 10, 12, 12),
-          pool_size=(3,3),
-          padding=(1,1))
-    check((4, 10, 12, 12),
-          (4, 10, 6, 6),
-          pool_size=(3, 3),
-          padding=(1, 1),
-          strides=(2, 2))
-    check((4, 10, 12, 12),
-          (4, 10, 7, 7),
-          pool_size=(3, 3),
-          padding=(1, 1),
-          strides=(2, 2),
-          ceil_mode=True)
-    check((4, 12, 14, 10),
-          (4, 6, 7, 10),
-          pool_size=(3, 3),
-          padding=(1, 1),
-          strides=(2, 2),
-          layout="NHWC")
-
-
-def test_global_pool2d():
-    def check(in_shape, out_shape, **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.global_max_pool2d(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 10, 12, 12),
-          (4, 10, 1, 1))
-    check((4, 10, 12, 12),
-          (4, 1, 1, 12),
-          layout="NHWC")
-
-
-# Level 3
-def test_reshape():
-    def check(in_shape, tshape, out_shape):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.reshape(x, shape=tshape, name="y")
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4,), (2, 2), (2, 2))
-    check((2, 3, 4), (4, 0, 2), (4, 3, 2))
-    check((2, 3, 4), (2, 0, 0), (2, 3, 4))
-    check((2, 3, 4), (6, 1, -1), (6, 1, 4))
-    check((2, 3, 4), (3, -1, 8), (3, 1, 8))
-    check((2, 3, 4), (-1,), (24,))
-    check((2, 3, 4), (-2,), (2, 3, 4))
-    check((2, 3, 4), (2, -2), (2, 3, 4))
-    check((2, 3, 4), (-2, 1, 1), (2, 3, 4, 1, 1))
-    check((2, 3, 4), (-3, 4), (6, 4))
-    check((2, 3, 4, 5), (-3, -3), (6, 20))
-    check((2, 3, 4), (0, -3), (2, 12))
-    check((2, 3, 4), (-3, -2), (6, 4))
-    check((2, 3, 4), (-4, 1, 2, -2), (1, 2, 3, 4))
-    check((2, 3, 4), (2, -4, -1, 3, -2), (2, 1, 3, 4))
-
-
-def test_prelu():
-    def check(in_shape, axis,  out_shape):
-        x = sym.Variable("x", shape=in_shape)
-        w = sym.Variable("w")
-        y = sym.prelu(x, w, axis=axis, name="y")
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-    check((1, 3, 2, 2), 1, (1, 3, 2, 2))
-    check((1, 2, 2, 3), 3, (1, 2, 2, 3))
-
-
-# Level 4
-def test_transpose():
-    def check(in_shape, out_shape, **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.transpose(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 1), (1, 4))
-    check((0, 1, 2, 3), (1, 2, 3, 0), axes=(1, 2, 3, 0))
-
-
-def test_broadcast_to():
-    def check(in_shape, tshape, out_shape):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.broadcast_to(x, shape=tshape, name="y")
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 1), (0, 4), (4, 4))
-    check((4, 1, 5), (0, 4, 5), (4, 4, 5))
-
-
-def test_broadcast_binary():
-    def check(lhs_shape, rhs_shape, out_shape):
-        x = sym.Variable("x", shape=lhs_shape)
-        y = sym.Variable("y", shape=rhs_shape)
-        z = sym.broadcast_add(x, y, name="y")
-        sdict = infer_shape(z)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 1), (4), (4, 4))
-    check((5, 1, 1), (1, 4, 4), (5, 4, 4))
-    check((6, 1, 4), (5, 4), (6, 5, 4))
-
-
-def test_reduce():
-    def check(in_shape, out_shape, **kwargs):
-        x = sym.Variable("x", shape=in_shape)
-        y = sym.sum(x, name="y", **kwargs)
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4, 5), (4,), axis=1)
-    check((4, 5), (4, 1), axis=1, keepdims=True)
-    check((4, 5), (1, 5), axis=0, keepdims=True)
-    check((4, 5), (1, 1), axis=(), keepdims=True)
-    check((4, 5), (1,), axis=())
-    check((4, 5, 10), (5,), axis=(0, 2))
-    check((4, 5, 10), (1, 5, 1), axis=(0, 2), keepdims=True)
-
-
-def test_gather_nd():
-    def check(data_shape, indices_shape, out_shape):
-        x = sym.Variable("x", shape=data_shape)
-        indices = sym.Variable("indices", shape=indices_shape)
-        y = sym.gather_nd(x, indices, name="y")
-        sdict = infer_shape(y)
-        assert(tuple(sdict["y"][0]) == tuple(out_shape))
-
-    check((4,), (1, 1), (1,))
-    check((4,), (1, 3), (3,))
-    check((2, 3), (1, 1), (1, 3))
-    check((2, 3), (2, 1), (1,))
-    check((2, 3), (2, 5, 6), (5, 6))
-    check((2, 3, 4), (1, 1), (1, 3, 4))
-    check((2, 3, 4), (2, 1), (1, 4))
-    check((2, 3, 4), (2, 5), (5, 4))
-    check((2, 3, 4), (2, 5, 6), (5, 6, 4))
-    check((2, 3, 4, 5), (2, 6, 7), (6, 7, 4, 5))
-
-
-if __name__ == "__main__":
-    test_conv2d_packed()
-    test_expand_dims()
-    test_dense()
-    test_matmul()
-    test_concatenate()
-    test_split()
-    test_batchnorm()
-    test_flatten()
-    test_conv2d()
-    test_conv2d_transpose()
-    test_max_pool2d()
-    test_global_pool2d()
-    test_reshape()
-    test_broadcast_to()
-    test_broadcast_binary()
-    test_reduce()
-    test_transpose()
-    test_prelu()
-    test_squeeze()
-    test_gather_nd()
diff --git a/nnvm/tests/python/unittest/test_pass_saveload_json.py b/nnvm/tests/python/unittest/test_pass_saveload_json.py
deleted file mode 100644
index a8b067c8fe24..000000000000
--- a/nnvm/tests/python/unittest/test_pass_saveload_json.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm
-from tvm.contrib import util
-
-
-def test_variable_node_parsed():
-    sym = nnvm.sym.Variable('data')
-    tempdir = util.tempdir()
-    json_filename = 'test_nnvm_symbol.json'
-    with open(tempdir.relpath(json_filename), 'w') as fo:
-        fo.write(nnvm.graph.create(sym).json())
-    sym_str = open(tempdir.relpath(json_filename), 'r').read()
-    sym = nnvm.graph.load_json(sym_str).symbol()
-    sym = nnvm.sym.relu(sym)
-
-
-if __name__ == '__main__':
-    test_variable_node_parsed()
diff --git a/nnvm/tests/python/unittest/test_symbol.py b/nnvm/tests/python/unittest/test_symbol.py
deleted file mode 100644
index a54dec170aae..000000000000
--- a/nnvm/tests/python/unittest/test_symbol.py
+++ /dev/null
@@ -1,77 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-from nnvm import NNVMError
-
-def test_dense():
-    x = sym.Variable('x')
-    y = sym.dense(x, units=30, name="fc")
-    assert y.list_input_names() == ["x", "fc_weight", "fc_bias"]
-
-def test_batch_norm():
-    x = sym.Variable('x')
-    y = sym.dense(x, units=30, name="fc")
-    z = sym.batch_norm(x, name='bn')
-    assert z.list_input_names('aux_state') == ['bn_moving_mean', 'bn_moving_var']
-    assert z.list_input_names('read_only') == ['x', 'bn_gamma', 'bn_beta']
-
-def test_compose():
-    x = sym.Variable('x')
-    z = sym.Variable('z')
-    y = sym.exp(sym.elemwise_add(x, x, name='add', gpu=2),
-                name='exp', gpu=1, attr={"kk": "1"})
-
-    assert y.list_input_names() == ['x']
-    assert y.list_output_names() == ["exp_output"]
-    assert y.list_attr()['gpu'] == '1'
-    z = y.get_internals()
-    assert z['add_output'].list_output_names() == ['add_output']
-    assert y.list_attr(recursive=True)['add$gpu'] == '2'
-
-def test_default_input():
-    x = sym.Variable('x')
-    y = sym.dense(data=x, units=30, name='fc', use_bias=False)
-    assert y.list_input_names() == ['x', 'fc_weight']
-    tname = [z.list_output_names()[0] for z in y.list_input_variables()]
-    assert tname == y.list_input_names()
-    try:
-        z = sym.elemwise_add(x)
-        assert False
-    except NNVMError:
-        pass
-
-def test_copy():
-    x = sym.Variable('x')
-    z = sym.Variable('z')
-    y = sym.exp(sym.elemwise_add(x, x, name='add', gpu=2),
-                name='exp', gpu=1, attr={"kk": "1"})
-    assert y.__copy__().debug_str() == y.debug_str()
-
-
-def test_op_name():
-    x = sym.Variable('x')
-    y = sym.exp(x)
-    op_name = y.attr("op_name")
-    op_func = sym.__dict__[op_name]
-    z = op_func(x)
-
-if __name__ == "__main__":
-    test_op_name()
-    test_copy()
-    test_default_input()
-    test_compose()
-    test_batch_norm()
diff --git a/nnvm/tests/python/unittest/test_top_level1.py b/nnvm/tests/python/unittest/test_top_level1.py
deleted file mode 100644
index 2d646dc16ae4..000000000000
--- a/nnvm/tests/python/unittest/test_top_level1.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-import nnvm.graph as graph
-
-def test_dense():
-    x = sym.Variable('x')
-    x1 = sym.dense(x, units=3, name="dense")
-    x2 = sym.flatten(x1)
-    x3 = sym.softmax(x2)
-    assert x3.list_input_names() == ['x', 'dense_weight', 'dense_bias']
-
-
-def test_concatenate_split():
-    x = sym.Variable('x')
-    y = sym.Variable('y')
-    y = sym.concatenate(x, y)
-    assert y.list_input_names() == ['x', 'y']
-    z = sym.split(y, indices_or_sections=10)
-    assert len(z.list_output_names()) == 10
-    z = sym.split(y, indices_or_sections=[10, 20])
-    assert len(z.list_output_names()) == 3
-
-def test_expand_dims():
-    x = sym.Variable('x')
-    y = sym.expand_dims(x, axis=1, num_newaxis=2)
-    assert y.list_input_names() == ['x']
-
-
-def test_unary():
-    x = sym.Variable('x')
-    x = sym.exp(x)
-    x = sym.log(x)
-    x = sym.sigmoid(x)
-    x = sym.tanh(x)
-    x = sym.relu(x)
-    assert x.list_input_names() == ['x']
-
-
-def test_batchnorm():
-    x = sym.Variable('x')
-    x = sym.batch_norm(x, name="bn")
-    assert x.list_input_names() == [
-        "x", "bn_gamma", "bn_beta", "bn_moving_mean", "bn_moving_var"]
-
-
-if __name__ == "__main__":
-    test_concatenate_split()
-    test_expand_dims()
-    test_dense()
-    test_unary()
-    test_batchnorm()
diff --git a/nnvm/tests/python/unittest/test_top_level2.py b/nnvm/tests/python/unittest/test_top_level2.py
deleted file mode 100644
index b327356b5cc0..000000000000
--- a/nnvm/tests/python/unittest/test_top_level2.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-
-def test_conv2d():
-    x = sym.Variable('x')
-    y = sym.conv2d(x, channels=3, kernel_size=(3, 3),
-                   name="y", use_bias=False)
-    assert y.list_input_names() == ["x", "y_weight"]
-
-
-def test_max_pool2d():
-    x = sym.Variable('x')
-    y = sym.max_pool2d(x, pool_size=(3, 3), name="y")
-    y = sym.global_max_pool2d(y)
-    assert y.list_input_names() == ["x"]
-
-
-if __name__ == "__main__":
-    test_conv2d()
-    test_max_pool2d()
diff --git a/nnvm/tests/python/unittest/test_top_level3.py b/nnvm/tests/python/unittest/test_top_level3.py
deleted file mode 100644
index f19e1fd4376e..000000000000
--- a/nnvm/tests/python/unittest/test_top_level3.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-
-def test_reshape():
-    x = sym.Variable("x")
-    y = sym.reshape(x, shape=(10, 20), name="y")
-    assert(y.list_input_names() == ["x"])
-
-
-def test_scalar_op():
-    x = sym.Variable("x")
-    y = (1 / (x * 2) - 1) ** 2
-    assert(y.list_input_names() == ["x"])
-
-def test_leaky_relu():
-    x = sym.Variable("x")
-    y = sym.leaky_relu(x, alpha=0.1)
-    assert(y.list_input_names() == ["x"])
-
-def test_prelu():
-    x = sym.Variable("x")
-    w = sym.Variable("w")
-    y = sym.prelu(x, w)
-    assert(y.list_input_names()[0] == 'x')
-    assert(y.list_input_names()[1] == 'w')
-
-if __name__ == "__main__":
-    test_scalar_op()
-    test_reshape()
-    test_leaky_relu()
-    test_prelu()
diff --git a/nnvm/tests/python/unittest/test_top_level4.py b/nnvm/tests/python/unittest/test_top_level4.py
deleted file mode 100644
index ad0829b59283..000000000000
--- a/nnvm/tests/python/unittest/test_top_level4.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import nnvm.symbol as sym
-
-def test_binary_broadcast():
-    x = sym.Variable('x')
-    y = sym.Variable('y')
-    z = x + y
-    z = x * y
-    z = x - y
-    z = x / y
-
-
-def test_broadcast_to():
-    x = sym.Variable('x')
-    y = sym.broadcast_to(x, shape=(3, 3))
-    assert y.list_input_names() == ["x"]
-
-
-if __name__ == "__main__":
-    test_binary_broadcast()
-    test_broadcast_to()
diff --git a/nnvm/tutorials/.gitignore b/nnvm/tutorials/.gitignore
deleted file mode 100644
index 5f8a03c46b89..000000000000
--- a/nnvm/tutorials/.gitignore
+++ /dev/null
@@ -1,11 +0,0 @@
-*.pb
-*.mlmodel
-*.ttf
-*.txt
-*synset*txt
-*.cfg
-ssd_model
-*.names
-*.jpg
-*.pbtxt
-*.weights
diff --git a/nnvm/tutorials/README.txt b/nnvm/tutorials/README.txt
deleted file mode 100644
index 334409cd8a28..000000000000
--- a/nnvm/tutorials/README.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-.. _tutorial-nnvm:
-
-NNVM Compiler Tutorials
------------------------
diff --git a/nnvm/tutorials/deploy_model_on_mali_gpu.py b/nnvm/tutorials/deploy_model_on_mali_gpu.py
deleted file mode 100644
index d90b0955048c..000000000000
--- a/nnvm/tutorials/deploy_model_on_mali_gpu.py
+++ /dev/null
@@ -1,229 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-deploy-model-on-mali-gpu:
-
-Deploy the Pretrained Model on ARM Mali GPU
-===========================================
-**Author**: `Lianmin Zheng <https://lmzheng.net/>`_, `Ziheng Jiang <https://ziheng.org/>`_
-
-This is an example of using NNVM to compile a ResNet model and
-deploy it on Firefly-RK3399 with ARM Mali GPU. We will use the
-Mali-T860 MP4 GPU on this board to accelerate the inference.
-"""
-
-import tvm
-import nnvm.compiler
-import nnvm.testing
-from tvm import rpc
-from tvm.contrib import util, graph_runtime as runtime
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Build TVM Runtime on Device
-# ---------------------------
-#
-# The first step is to build tvm runtime on the remote device.
-#
-# .. note::
-#
-#   All instructions in both this section and next section should be
-#   executed on the target device, e.g. Rk3399. And we assume it
-#   has Linux running.
-#
-# Since we do compilation on local machine, the remote device is only used
-# for running the generated code. We only need to build tvm runtime on
-# the remote device. Make sure you have opencl driver in your board.
-# You can refer to `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_
-# to setup OS and opencl driver for rk3399.
-#
-# .. code-block:: bash
-#
-#   git clone --recursive https://github.com/apache/incubator-tvm tvm
-#   cd tvm
-#   cp cmake/config.cmake .
-#   sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake
-#   make runtime -j4
-#
-# After building runtime successfully, we need to set environment varibles
-# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
-# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
-# directory is in :code:`~/tvm`):
-#
-# .. code-block:: bash
-#
-#   export PYTHONPATH=$PYTHONPATH:~/tvm/python
-#
-# To update the environment variables, execute :code:`source ~/.bashrc`.
-
-######################################################################
-# Set Up RPC Server on Device
-# ---------------------------
-# To start an RPC server, run the following command on your remote device
-# (Which is RK3399 in our example).
-#
-#   .. code-block:: bash
-#
-#     python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
-#
-# If you see the line below, it means the RPC server started
-# successfully on your device.
-#
-#    .. code-block:: bash
-#
-#      INFO:root:RPCServer: bind to 0.0.0.0:9090
-#
-
-######################################################################
-# Prepare the Pre-trained Model
-# -----------------------------
-# Back to the host machine, which should have a full TVM installed (with LLVM).
-#
-# We will use pre-trained model from
-# `MXNet Gluon model zoo <https://mxnet.incubator.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`.
-
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-import numpy as np
-
-# only one line to get the model
-block = get_model('resnet18_v1', pretrained=True)
-
-######################################################################
-# In order to test our model, here we download an image of cat and
-# transform its format.
-img_name = 'cat.png'
-img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true',
-                             img_name, module='data')
-image = Image.open(img_path).resize((224, 224))
-
-def transform_image(image):
-    image = np.array(image) - np.array([123., 117., 104.])
-    image /= np.array([58.395, 57.12, 57.375])
-    image = image.transpose((2, 0, 1))
-    image = image[np.newaxis, :]
-    return image
-
-x = transform_image(image)
-
-######################################################################
-# synset is used to transform the label from number of ImageNet class to
-# the word human can understand.
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
-                      '4d0b62f3d01426887599d4f7ede23ee5/raw/',
-                      '596b27d23537e5a1b5751d2b0481ef172f58b539/',
-                      'imagenet1000_clsid_to_human.txt'])
-
-synset_name = 'imagenet1000_clsid_to_human.txt'
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
-    synset = eval(f.read())
-
-######################################################################
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-net, params = nnvm.frontend.from_mxnet(block)
-# we want a probability so add a softmax operator
-net = nnvm.sym.softmax(net)
-
-######################################################################
-# Here are some basic data workload configurations.
-batch_size = 1
-num_classes = 1000
-image_shape = (3, 224, 224)
-data_shape = (batch_size,) + image_shape
-
-######################################################################
-# Compile The Graph
-# -----------------
-# To compile the graph, we call the :any:`nnvm.compiler.build` function
-# with the graph configuration and parameters. As we use OpenCL for
-# GPU computing, the tvm will generate both OpenCL kernel code and ARM
-# CPU host code. The CPU host code is used for calling OpenCL kernels.
-# In order to generate correct CPU code, we need to specify the target
-# triplet for host ARM device by setting the parameter :code:`target_host`.
-
-######################################################################
-# If we run the example on our x86 server for demonstration, we can simply
-# set it as :code:`llvm`. If running it on the RK3399, we need to
-# specify its instruction set. Set :code:`local_demo` to False if you
-# want to run this tutorial with a real device.
-
-local_demo = True
-
-if local_demo:
-    target_host = "llvm"
-    target = "llvm"
-else:
-    # Here is the setting for my rk3399 board
-    # If you don't use rk3399, you can query your target triple by
-    # execute `gcc -v` on your board.
-    target_host = "llvm -target=aarch64-linux-gnu"
-
-    # set target as  `tvm.target.mali` instead of 'opencl' to enable
-    # optimization for mali
-    target = tvm.target.mali()
-
-with nnvm.compiler.build_config(opt_level=3):
-    graph, lib, params = nnvm.compiler.build(net, target=target,
-            shape={"data": data_shape}, params=params, target_host=target_host)
-
-# After `nnvm.compiler.build`, you will get three return values: graph,
-# library and the new parameter, since we do some optimization that will
-# change the parameters but keep the result of model as the same.
-
-# Save the library at local temporary directory.
-tmp = util.tempdir()
-lib_fname = tmp.relpath('net.tar')
-lib.export_library(lib_fname)
-
-######################################################################
-# Deploy the Model Remotely by RPC
-# --------------------------------
-# With RPC, you can deploy the model remotely from your host machine
-# to the remote device.
-
-# obtain an RPC session from remote device.
-if local_demo:
-    remote = rpc.LocalSession()
-else:
-    # The following is my environment, change this to the IP address of your target device
-    host = '10.77.1.145'
-    port = 9090
-    remote = rpc.connect(host, port)
-
-# upload the library to remote device and load it
-remote.upload(lib_fname)
-rlib = remote.load_module('net.tar')
-
-# create the remote runtime module
-ctx = remote.cl(0) if not local_demo else remote.cpu(0)
-module = runtime.create(graph, rlib, ctx)
-# set parameter (upload params to the remote device. This may take a while)
-module.set_input(**params)
-# set input data
-module.set_input('data', tvm.nd.array(x.astype('float32')))
-# run
-module.run()
-# get output
-out = module.get_output(0)
-# get top1 result
-top1 = np.argmax(out.asnumpy())
-print('TVM prediction top-1: {}'.format(synset[top1]))
diff --git a/nnvm/tutorials/deploy_model_on_rasp.py b/nnvm/tutorials/deploy_model_on_rasp.py
deleted file mode 100644
index 576b517f3aa5..000000000000
--- a/nnvm/tutorials/deploy_model_on_rasp.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-deploy-model-on-rasp:
-
-Deploy the Pretrained Model on Raspberry Pi
-===========================================
-**Author**: `Ziheng Jiang <https://ziheng.org/>`_
-
-This is an example of using NNVM to compile a ResNet model and deploy
-it on Raspberry Pi.
-"""
-
-import tvm
-import nnvm.compiler
-import nnvm.testing
-from tvm import rpc
-from tvm.contrib import util, graph_runtime as runtime
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# .. _build-tvm-runtime-on-device:
-#
-# Build TVM Runtime on Device
-# ---------------------------
-#
-# The first step is to build tvm runtime on the remote device.
-#
-# .. note::
-#
-#   All instructions in both this section and next section should be
-#   executed on the target device, e.g. Raspberry Pi. And we assume it
-#   has Linux running.
-#
-# Since we do compilation on local machine, the remote device is only used
-# for running the generated code. We only need to build tvm runtime on
-# the remote device.
-#
-# .. code-block:: bash
-#
-#   git clone --recursive https://github.com/apache/incubator-tvm tvm
-#   cd tvm
-#   make runtime -j4
-#
-# After building runtime successfully, we need to set environment varibles
-# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
-# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
-# directory is in :code:`~/tvm`):
-#
-# .. code-block:: bash
-#
-#   export PYTHONPATH=$PYTHONPATH:~/tvm/python
-#
-# To update the environment variables, execute :code:`source ~/.bashrc`.
-
-######################################################################
-# Set Up RPC Server on Device
-# ---------------------------
-# To start an RPC server, run the following command on your remote device
-# (Which is Raspberry Pi in our example).
-#
-#   .. code-block:: bash
-#
-#     python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
-#
-# If you see the line below, it means the RPC server started
-# successfully on your device.
-#
-#    .. code-block:: bash
-#
-#      INFO:root:RPCServer: bind to 0.0.0.0:9090
-#
-
-######################################################################
-# Prepare the Pre-trained Model
-# -----------------------------
-# Back to the host machine, which should have a full TVM installed (with LLVM).
-#
-# We will use pre-trained model from
-# `MXNet Gluon model zoo <https://mxnet.incubator.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`.
-
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-import numpy as np
-
-# one line to get the model
-block = get_model('resnet18_v1', pretrained=True)
-
-######################################################################
-# In order to test our model, here we download an image of cat and
-# transform its format.
-img_name = 'cat.png'
-img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true',
-                             img_name, module='data')
-image = Image.open(img_path).resize((224, 224))
-
-def transform_image(image):
-    image = np.array(image) - np.array([123., 117., 104.])
-    image /= np.array([58.395, 57.12, 57.375])
-    image = image.transpose((2, 0, 1))
-    image = image[np.newaxis, :]
-    return image
-
-x = transform_image(image)
-
-######################################################################
-# synset is used to transform the label from number of ImageNet class to
-# the word human can understand.
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
-                      '4d0b62f3d01426887599d4f7ede23ee5/raw/',
-                      '596b27d23537e5a1b5751d2b0481ef172f58b539/',
-                      'imagenet1000_clsid_to_human.txt'])
-synset_name = 'imagenet1000_clsid_to_human.txt'
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
-    synset = eval(f.read())
-
-######################################################################
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-net, params = nnvm.frontend.from_mxnet(block)
-# we want a probability so add a softmax operator
-net = nnvm.sym.softmax(net)
-
-######################################################################
-# Here are some basic data workload configurations.
-batch_size = 1
-num_classes = 1000
-image_shape = (3, 224, 224)
-data_shape = (batch_size,) + image_shape
-
-######################################################################
-# Compile The Graph
-# -----------------
-# To compile the graph, we call the :any:`nnvm.compiler.build` function
-# with the graph configuration and parameters. However, You cannot to
-# deploy a x86 program on a device with ARM instruction set. It means
-# NNVM also needs to know the compilation option of target device,
-# apart from arguments :code:`net` and :code:`params` to specify the
-# deep learning workload. Actually, the option matters, different option
-# will lead to very different performance.
-
-######################################################################
-# If we run the example on our x86 server for demonstration, we can simply
-# set it as :code:`llvm`. If running it on the Raspberry Pi, we need to
-# specify its instruction set. Set :code:`local_demo` to False if you want
-# to run this tutorial with a real device.
-
-local_demo = True
-
-if local_demo:
-    target = tvm.target.create('llvm')
-else:
-    target = tvm.target.arm_cpu('rasp3b')
-    # The above line is a simple form of
-    # target = tvm.target.create('llvm -device=arm_cpu -model=bcm2837 -target=armv7l-linux-gnueabihf -mattr=+neon')
-
-with nnvm.compiler.build_config(opt_level=3):
-    graph, lib, params = nnvm.compiler.build(
-        net, target, shape={"data": data_shape}, params=params)
-
-# After `nnvm.compiler.build`, you will get three return values: graph,
-# library and the new parameter, since we do some optimization that will
-# change the parameters but keep the result of model as the same.
-
-# Save the library at local temporary directory.
-tmp = util.tempdir()
-lib_fname = tmp.relpath('net.tar')
-lib.export_library(lib_fname)
-
-######################################################################
-# Deploy the Model Remotely by RPC
-# --------------------------------
-# With RPC, you can deploy the model remotely from your host machine
-# to the remote device.
-
-# obtain an RPC session from remote device.
-if local_demo:
-    remote = rpc.LocalSession()
-else:
-    # The following is my environment, change this to the IP address of your target device
-    host = '10.77.1.162'
-    port = 9090
-    remote = rpc.connect(host, port)
-
-# upload the library to remote device and load it
-remote.upload(lib_fname)
-rlib = remote.load_module('net.tar')
-
-# create the remote runtime module
-ctx = remote.cpu(0)
-module = runtime.create(graph, rlib, ctx)
-# set parameter (upload params to the remote device. This may take a while)
-module.set_input(**params)
-# set input data
-module.set_input('data', tvm.nd.array(x.astype('float32')))
-# run
-module.run()
-# get output
-out = module.get_output(0)
-# get top1 result
-top1 = np.argmax(out.asnumpy())
-print('TVM prediction top-1: {}'.format(synset[top1]))
diff --git a/nnvm/tutorials/deploy_ssd_mxnet.py b/nnvm/tutorials/deploy_ssd_mxnet.py
deleted file mode 100644
index c88c61984293..000000000000
--- a/nnvm/tutorials/deploy_ssd_mxnet.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Single Shot Multibox Detector(SSD) model
-===============================================
-**Author**: `Yao Wang <https://github.com/kevinthesun>`_, \
-`Leyuan Wang <https://github.com/Laurawly>`_
-
-This article is an introductory tutorial to deploy SSD models with TVM.
-We will use mxnet pretrained SSD model with Resnet50 as body network and
-convert it to NNVM graph;
-"""
-import os
-import zipfile
-import tvm
-import mxnet as mx
-import cv2
-import numpy as np
-
-from nnvm import compiler
-from nnvm.frontend import from_mxnet
-from tvm import relay
-from tvm.contrib.download import download_testdata
-from tvm.contrib import graph_runtime
-from mxnet.model import load_checkpoint
-
-
-######################################################################
-# Preliminary and Set parameters
-# ------------------------------
-# We should build TVM with sort support, in TVM root directory
-#
-# .. code-block:: bash
-#
-#   echo "set(USE_SORT ON)" > config.mk
-#   make -j8
-#
-
-model_name = "ssd_resnet50_512"
-model_file = "%s.zip" % model_name
-test_image = "dog.jpg"
-dshape = (1, 3, 512, 512)
-dtype = "float32"
-
-# Target settings
-# Use these commented settings to build for cuda.
-#target = 'cuda'
-#ctx = tvm.gpu(0)
-# Use these commented settings to build for opencl.
-#target = 'opencl'
-#ctx = tvm.opencl(0)
-target = "llvm"
-ctx = tvm.cpu()
-
-######################################################################
-# Download MXNet SSD pre-trained model and demo image
-# ---------------------------------------------------
-# Pre-trained model available at
-# https://github.com/apache/incubator-\mxnet/tree/master/example/ssd
-
-model_url = "https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/" \
-            "resnet50_ssd_512_voc0712_trainval.zip"
-image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \
-            "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg"
-inference_symbol_folder = \
-    "c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26"
-inference_symbol_url = "https://gist.github.com/kevinthesun/c1904e900848df4548ce5dfb18c719c7/" \
-                       "archive/a28c4856c827fe766aa3da0e35bad41d44f0fb26.zip"
-
-model_file_path = download_testdata(model_url, model_file, module=["mxnet", "ssd_model"])
-inference_symbol_path = download_testdata(inference_symbol_url, "inference_model.zip",
-                                          module=["mxnet", "ssd_model"])
-test_image_path = download_testdata(image_url, test_image, module="data")
-model_dir = os.path.dirname(model_file_path)
-
-zip_ref = zipfile.ZipFile(model_file_path, 'r')
-zip_ref.extractall(model_dir)
-zip_ref.close()
-zip_ref = zipfile.ZipFile(inference_symbol_path)
-zip_ref.extractall(model_dir)
-zip_ref.close()
-
-######################################################################
-# Convert and compile model with NNVM or Relay for CPU.
-
-sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (model_dir, inference_symbol_folder))
-_, arg_params, aux_params = load_checkpoint("%s/%s" % (model_dir, model_name), 0)
-
-import argparse
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "-f", "--frontend",
-    help="Frontend for compilation, nnvm or relay",
-    type=str,
-    default="nnvm")
-args = parser.parse_args()
-if args.frontend == "relay":
-    net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \
-                                            aux_params=aux_params)
-    with relay.build_config(opt_level=3):
-        graph, lib, params = relay.build(net, target, params=params)
-elif args.frontend == "nnvm":
-    net, params = from_mxnet(sym, arg_params, aux_params)
-    with compiler.build_config(opt_level=3):
-        graph, lib, params = compiler.build(
-            net, target, {"data": dshape}, params=params)
-else:
-    parser.print_help()
-    parser.exit()
-
-######################################################################
-# Create TVM runtime and do inference
-
-# Preprocess image
-image = cv2.imread(test_image_path)
-img_data = cv2.resize(image, (dshape[2], dshape[3]))
-img_data = img_data[:, :, (2, 1, 0)].astype(np.float32)
-img_data -= np.array([123, 117, 104])
-img_data = np.transpose(np.array(img_data), (2, 0, 1))
-img_data = np.expand_dims(img_data, axis=0)
-# Build TVM runtime
-m = graph_runtime.create(graph, lib, ctx)
-m.set_input('data', tvm.nd.array(img_data.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-
-
-######################################################################
-# Display result
-
-class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair",
-               "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant",
-               "sheep", "sofa", "train", "tvmonitor"]
-def display(img, out, thresh=0.5):
-    import random
-    import matplotlib as mpl
-    import matplotlib.pyplot as plt
-    mpl.rcParams['figure.figsize'] = (10, 10)
-    pens = dict()
-    plt.clf()
-    plt.imshow(img)
-    for det in out:
-        cid = int(det[0])
-        if cid < 0:
-            continue
-        score = det[1]
-        if score < thresh:
-            continue
-        if cid not in pens:
-            pens[cid] = (random.random(), random.random(), random.random())
-        scales = [img.shape[1], img.shape[0]] * 2
-        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
-        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False,
-                             edgecolor=pens[cid], linewidth=3)
-        plt.gca().add_patch(rect)
-        text = class_names[cid]
-        plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score),
-                       bbox=dict(facecolor=pens[cid], alpha=0.5),
-                       fontsize=12, color='white')
-    plt.show()
-
-image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-display(image, tvm_output.asnumpy()[0], thresh=0.45)
diff --git a/nnvm/tutorials/from_coreml.py b/nnvm/tutorials/from_coreml.py
deleted file mode 100644
index 3eaced18728e..000000000000
--- a/nnvm/tutorials/from_coreml.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile CoreML Models
-=====================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_
-
-This article is an introductory tutorial to deploy CoreML models with NNVM.
-
-For us to begin with, coremltools module is required to be installed.
-
-A quick solution is to install via pip
-
-.. code-block:: bash
-
-    pip install -U coremltools --user
-
-or please refer to official site
-https://github.com/apple/coremltools
-"""
-import nnvm
-import tvm
-import coremltools as cm
-import numpy as np
-from PIL import Image
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Load pretrained CoreML model
-# ----------------------------
-# We will download and load a pretrained mobilenet classification network
-# provided by apple in this example
-model_url = 'https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel'
-model_file = 'mobilenet.mlmodel'
-model_path = download_testdata(model_url, model_file, module='coreml')
-# now you mobilenet.mlmodel on disk
-mlmodel = cm.models.MLModel(model_path)
-# we can load the graph as NNVM compatible model
-sym, params = nnvm.frontend.from_coreml(mlmodel)
-
-######################################################################
-# Load a test image
-# ------------------
-# A single cat dominates the examples!
-from PIL import Image
-img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
-img_path = download_testdata(img_url, 'cat.png', module='data')
-img = Image.open(img_path).resize((224, 224))
-#x = np.transpose(img, (2, 0, 1))[np.newaxis, :]
-image = np.asarray(img)
-image = image.transpose((2, 0, 1))
-x = image[np.newaxis, :]
-######################################################################
-# Compile the model on NNVM
-# ---------------------------
-# We should be familiar with the process right now.
-import nnvm.compiler
-target = 'cuda'
-shape_dict = {'image': x.shape}
-with nnvm.compiler.build_config(opt_level=2, add_pass=['AlterOpLayout']):
-    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-
-######################################################################
-# Execute on TVM
-# -------------------
-# The process is no different from other example
-from tvm.contrib import graph_runtime
-ctx = tvm.gpu(0)
-dtype = 'float32'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input('image', tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-top1 = np.argmax(tvm_output.asnumpy()[0])
-
-#####################################################################
-# Look up synset name
-# -------------------
-# Look up prediction top 1 index in 1000 class synset.
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
-                      '4d0b62f3d01426887599d4f7ede23ee5/raw/',
-                      '596b27d23537e5a1b5751d2b0481ef172f58b539/',
-                      'imagenet1000_clsid_to_human.txt'])
-synset_name = 'imagenet1000_clsid_to_human.txt'
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
-    synset = eval(f.read())
-print('Top-1 id', top1, 'class name', synset[top1])
diff --git a/nnvm/tutorials/from_darknet.py b/nnvm/tutorials/from_darknet.py
deleted file mode 100644
index d2ab647da1b3..000000000000
--- a/nnvm/tutorials/from_darknet.py
+++ /dev/null
@@ -1,177 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile YOLO-V2 and YOLO-V3 in DarkNet Models
-=================================
-**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
-
-This article is an introductory tutorial to deploy darknet models with NNVM.
-All the required models and libraries will be downloaded from the internet by the script.
-This script runs the YOLO-V2 and YOLO-V3 Model with the bounding boxes
-Darknet parsing have dependancy with CFFI and CV2 library
-Please install CFFI and CV2 before executing this script
-
-.. code-block:: bash
-
-  pip install cffi
-  pip install opencv-python
-"""
-
-import nnvm
-import nnvm.frontend.darknet
-import tvm.relay.testing.yolo_detection
-import tvm.relay.testing.darknet
-import matplotlib.pyplot as plt
-import numpy as np
-import tvm
-import sys
-
-from ctypes import *
-from tvm.contrib.download import download_testdata
-from tvm.relay.testing.darknet import __darknetffi__
-
-# Model name
-MODEL_NAME = 'yolov3'
-
-######################################################################
-# Download required files
-# -----------------------
-# Download cfg and weights file if first time.
-CFG_NAME = MODEL_NAME + '.cfg'
-WEIGHTS_NAME = MODEL_NAME + '.weights'
-REPO_URL = 'https://github.com/siju-samuel/darknet/blob/master/'
-CFG_URL = REPO_URL + 'cfg/' + CFG_NAME + '?raw=true'
-WEIGHTS_URL = 'https://pjreddie.com/media/files/' + WEIGHTS_NAME
-
-cfg_path = download_testdata(CFG_URL, CFG_NAME, module="darknet")
-weights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module="darknet")
-
-# Download and Load darknet library
-if sys.platform in ['linux', 'linux2']:
-    DARKNET_LIB = 'libdarknet2.0.so'
-    DARKNET_URL = REPO_URL + 'lib/' + DARKNET_LIB + '?raw=true'
-elif sys.platform == 'darwin':
-    DARKNET_LIB = 'libdarknet_mac2.0.so'
-    DARKNET_URL = REPO_URL + 'lib_osx/' + DARKNET_LIB + '?raw=true'
-else:
-    err = "Darknet lib is not supported on {} platform".format(sys.platform)
-    raise NotImplementedError(err)
-
-lib_path = download_testdata(DARKNET_URL, DARKNET_LIB, module="darknet")
-
-DARKNET_LIB = __darknetffi__.dlopen(lib_path)
-net = DARKNET_LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0)
-dtype = 'float32'
-batch_size = 1
-
-print("Converting darknet to nnvm symbols...")
-sym, params = nnvm.frontend.darknet.from_darknet(net, dtype)
-
-######################################################################
-# Compile the model on NNVM
-# -------------------------
-# compile the model
-target = 'llvm'
-ctx = tvm.cpu(0)
-data = np.empty([batch_size, net.c, net.h, net.w], dtype)
-shape = {'data': data.shape}
-print("Compiling the model...")
-dtype_dict = {}
-with nnvm.compiler.build_config(opt_level=2):
-    graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype_dict, params)
-
-[neth, netw] = shape['data'][2:] # Current image shape is 608x608
-######################################################################
-# Load a test image
-# --------------------------------------------------------------------
-test_image = 'dog.jpg'
-print("Loading the test image...")
-img_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + \
-          test_image + '?raw=true'
-img_path = download_testdata(img_url, test_image, "data")
-
-data = tvm.relay.testing.darknet.load_image(img_path, netw, neth)
-######################################################################
-# Execute on TVM Runtime
-# ----------------------
-# The process is no different from other examples.
-from tvm.contrib import graph_runtime
-
-m = graph_runtime.create(graph, lib, ctx)
-
-# set inputs
-m.set_input('data', tvm.nd.array(data.astype(dtype)))
-m.set_input(**params)
-# execute
-print("Running the test image...")
-
-m.run()
-# get outputs
-tvm_out = []
-if MODEL_NAME == 'yolov2':
-    layer_out = {}
-    layer_out['type'] = 'Region'
-    # Get the region layer attributes (n, out_c, out_h, out_w, classes, coords, background)
-    layer_attr = m.get_output(2).asnumpy()
-    layer_out['biases'] = m.get_output(1).asnumpy()
-    out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
-                 layer_attr[2], layer_attr[3])
-    layer_out['output'] = m.get_output(0).asnumpy().reshape(out_shape)
-    layer_out['classes'] = layer_attr[4]
-    layer_out['coords'] = layer_attr[5]
-    layer_out['background'] = layer_attr[6]
-    tvm_out.append(layer_out)
-
-elif MODEL_NAME == 'yolov3':
-    for i in range(3):
-        layer_out = {}
-        layer_out['type'] = 'Yolo'
-        # Get the yolo layer attributes (n, out_c, out_h, out_w, classes, total)
-        layer_attr = m.get_output(i*4+3).asnumpy()
-        layer_out['biases'] = m.get_output(i*4+2).asnumpy()
-        layer_out['mask'] = m.get_output(i*4+1).asnumpy()
-        out_shape = (layer_attr[0], layer_attr[1]//layer_attr[0],
-                     layer_attr[2], layer_attr[3])
-        layer_out['output'] = m.get_output(i*4).asnumpy().reshape(out_shape)
-        layer_out['classes'] = layer_attr[4]
-        tvm_out.append(layer_out)
-
-# do the detection and bring up the bounding boxes
-thresh = 0.5
-nms_thresh = 0.45
-img = tvm.relay.testing.darknet.load_image_color(img_path)
-_, im_h, im_w = img.shape
-dets = tvm.relay.testing.yolo_detection.fill_network_boxes((netw, neth), (im_w, im_h), thresh,
-                                                      1, tvm_out)
-last_layer = net.layers[net.n - 1]
-tvm.relay.testing.yolo_detection.do_nms_sort(dets, last_layer.classes, nms_thresh)
-
-coco_name = 'coco.names'
-coco_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + coco_name + '?raw=true'
-font_name = 'arial.ttf'
-font_url = 'https://github.com/siju-samuel/darknet/blob/master/data/' + font_name + '?raw=true'
-coco_path = download_testdata(coco_url, coco_name, module='data')
-font_path = download_testdata(font_url, font_name, module='data')
-
-with open(coco_path) as f:
-    content = f.readlines()
-
-names = [x.strip() for x in content]
-
-tvm.relay.testing.yolo_detection.draw_detections(font_path, img, dets, thresh, names, last_layer.classes)
-plt.imshow(img.transpose(1, 2, 0))
-plt.show()
diff --git a/nnvm/tutorials/from_mxnet.py b/nnvm/tutorials/from_mxnet.py
deleted file mode 100644
index e4a30aa2c0e0..000000000000
--- a/nnvm/tutorials/from_mxnet.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-from-mxnet:
-
-Compile MXNet Models
-====================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_
-
-This article is an introductory tutorial to deploy mxnet models with NNVM.
-
-For us to begin with, mxnet module is required to be installed.
-
-A quick solution is
-
-.. code-block:: bash
-
-    pip install mxnet --user
-
-or please refer to offical installation guide.
-https://mxnet.incubator.apache.org/versions/master/install/index.html
-"""
-# some standard imports
-import mxnet as mx
-import numpy as np
-import nnvm
-import tvm
-from tvm.contrib.download import download_testdata
-
-######################################################################
-# Download Resnet18 model from Gluon Model Zoo
-# ---------------------------------------------
-# In this section, we download a pretrained imagenet model and classify an image.
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-from matplotlib import pyplot as plt
-block = get_model('resnet18_v1', pretrained=True)
-img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
-img_name = 'cat.png'
-synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
-                      '4d0b62f3d01426887599d4f7ede23ee5/raw/',
-                      '596b27d23537e5a1b5751d2b0481ef172f58b539/',
-                      'imagenet1000_clsid_to_human.txt'])
-synset_name = 'imagenet1000_clsid_to_human.txt'
-img_path = download_testdata(img_url, img_name, module='data')
-synset_path = download_testdata(synset_url, synset_name, module='data')
-with open(synset_path) as f:
-    synset = eval(f.read())
-image = Image.open(img_path).resize((224, 224))
-plt.imshow(image)
-plt.show()
-
-def transform_image(image):
-    image = np.array(image) - np.array([123., 117., 104.])
-    image /= np.array([58.395, 57.12, 57.375])
-    image = image.transpose((2, 0, 1))
-    image = image[np.newaxis, :]
-    return image
-
-x = transform_image(image)
-print('x', x.shape)
-
-######################################################################
-# Compile the Graph
-# -----------------
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-sym, params = nnvm.frontend.from_mxnet(block)
-# we want a probability so add a softmax operator
-sym = nnvm.sym.softmax(sym)
-
-######################################################################
-# now compile the graph
-import nnvm.compiler
-target = 'cuda'
-shape_dict = {'data': x.shape}
-with nnvm.compiler.build_config(opt_level=3):
-    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-
-######################################################################
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now, we would like to reproduce the same forward computation using TVM.
-from tvm.contrib import graph_runtime
-ctx = tvm.gpu(0)
-dtype = 'float32'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input('data', tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-top1 = np.argmax(tvm_output.asnumpy()[0])
-print('TVM prediction top-1:', top1, synset[top1])
-
-######################################################################
-# Use MXNet symbol with pretrained weights
-# ----------------------------------------
-# MXNet often use `arg_params` and `aux_params` to store network parameters
-# separately, here we show how to use these weights with existing API
-def block2symbol(block):
-    data = mx.sym.Variable('data')
-    sym = block(data)
-    args = {}
-    auxs = {}
-    for k, v in block.collect_params().items():
-        args[k] = mx.nd.array(v.data().asnumpy())
-    return sym, args, auxs
-mx_sym, args, auxs = block2symbol(block)
-# usually we would save/load it as checkpoint
-mx.model.save_checkpoint('resnet18_v1', 0, mx_sym, args, auxs)
-# there are 'resnet18_v1-0000.params' and 'resnet18_v1-symbol.json' on disk
-
-######################################################################
-# for a normal mxnet model, we start from here
-mx_sym, args, auxs = mx.model.load_checkpoint('resnet18_v1', 0)
-# now we use the same API to get NNVM compatible symbol
-nnvm_sym, nnvm_params = nnvm.frontend.from_mxnet(mx_sym, args, auxs)
-# repeat the same steps to run this model using TVM
diff --git a/nnvm/tutorials/from_mxnet_to_webgl.py b/nnvm/tutorials/from_mxnet_to_webgl.py
deleted file mode 100644
index a54704cca381..000000000000
--- a/nnvm/tutorials/from_mxnet_to_webgl.py
+++ /dev/null
@@ -1,515 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Deep Learning Models to OpenGL and WebGL
-===============================================
-**Author**: `Zhixun Tan <https://github.com/phisiart>`_
-
-This example shows how to build a neural network with NNVM python frontend and
-generate runtime library for WebGL running in a browser with TVM.
-To run this notebook, you need to install tvm and nnvm.
-Notice that you need to build tvm with OpenGL.
-"""
-
-######################################################################
-# Overview
-# --------
-# In this tutorial, we will download a pre-trained resnet18 model from Gluon
-# Model Zoo, and run image classification in 3 different ways:
-#
-# - Run locally:
-#   We will compile the model into a TVM library with OpenGL device code and
-#   directly run it locally.
-#
-# - Run in a browser through RPC:
-#   We will compile the model into a JavaScript TVM library with WebGL device
-#   code, and upload it to an RPC server that is hosting JavaScript TVM runtime
-#   to run it.
-#
-# - Export a JavaScript library and run in a browser:
-#   We will compile the model into a JavaScript TVM library with WebGL device
-#   code, combine it with JavaScript TVM runtime, and pack everything together.
-#   Then we will run it directly in a browser.
-#
-from __future__ import print_function
-
-import numpy as np
-import tvm
-from tvm.contrib.download import download_testdata
-import nnvm.compiler
-import nnvm.testing
-
-# This tutorial must be run with OpenGL backend enabled in TVM.
-# The NNVM CI does not enable OpenGL yet. But the user can run this script.
-opengl_enabled = tvm.module.enabled("opengl")
-
-# To run the local demo, set this flag to True.
-run_deploy_local = False
-
-# To run the RPC demo, set this flag to True.
-run_deploy_rpc = False
-
-# To run the WebGL deploy demo, set this flag to True.
-run_deploy_web = False
-
-######################################################################
-# Download a Pre-trained Resnet18 Model
-# -------------------------------------
-# Here we define 2 functions:
-#
-# - A function that downloads a pre-trained resnet18 model from Gluon Model Zoo.
-#   The model that we download is in MXNet format, we then transform it into an
-#   NNVM computation graph.
-#
-# - A function that downloads a file that contains the name of all the image
-#   classes in this model.
-#
-def load_mxnet_resnet():
-    """Load a pretrained resnet model from MXNet and transform that into NNVM
-       format.
-
-    Returns
-    -------
-    net : nnvm.Symbol
-        The loaded resnet computation graph.
-
-    params : dict[str -> NDArray]
-        The pretrained model parameters.
-
-    data_shape: tuple
-        The shape of the input tensor (an image).
-
-    out_shape: tuple
-        The shape of the output tensor (probability of all classes).
-    """
-
-    print("Loading pretrained resnet model from MXNet...")
-
-    # Download a pre-trained mxnet resnet18_v1 model.
-    from mxnet.gluon.model_zoo.vision import get_model
-    block = get_model('resnet18_v1', pretrained=True)
-
-    # Transform the mxnet model into NNVM.
-    # We want a probability so add a softmax operator.
-    sym, params = nnvm.frontend.from_mxnet(block)
-    sym = nnvm.sym.softmax(sym)
-
-    print("- Model loaded!")
-    return sym, params, (1, 3, 224, 224), (1, 1000)
-
-def download_synset():
-    """Download a dictionary from class index to name.
-    This lets us know what our prediction actually is.
-
-    Returns
-    -------
-    synset : dict[int -> str]
-        The loaded synset.
-    """
-
-    print("Downloading synset...")
-
-    url = "https://gist.githubusercontent.com/zhreshold/" + \
-          "4d0b62f3d01426887599d4f7ede23ee5/raw/" + \
-          "596b27d23537e5a1b5751d2b0481ef172f58b539/" + \
-          "imagenet1000_clsid_to_human.txt"
-    file_name = "imagenet1000_clsid_to_human.txt"
-
-    file_path = download_testdata(url, file_name, module='data')
-    with open(file_path) as f:
-        synset = eval(f.read())
-
-    print("- Synset downloaded!")
-    return synset
-
-######################################################################
-# Download Input Image
-# --------------------
-# Here we define 2 functions that prepare an image that we want to perform
-# classification on.
-#
-# - A function that downloads a cat image.
-#
-# - A function that performs preprocessing to an image so that it fits the
-#   format required by the resnet18 model.
-#
-def download_image():
-    """Download a cat image and resize it to 224x224 which fits resnet.
-
-    Returns
-    -------
-    image : PIL.Image.Image
-        The loaded and resized image.
-    """
-
-    print("Downloading cat image...")
-
-    from matplotlib import pyplot as plt
-    from PIL import Image
-
-    url = "https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true"
-    img_name = "cat.png"
-
-    img_path = download_testdata(url, img_name, module='data')
-    image = Image.open(img_path).resize((224, 224))
-
-    print("- Cat image downloaded!")
-
-    plt.imshow(image)
-    plt.show()
-
-    return image
-
-def transform_image(image):
-    """Perform necessary preprocessing to input image.
-
-    Parameters
-    ----------
-    image : numpy.ndarray
-        The raw image.
-
-    Returns
-    -------
-    image : numpy.ndarray
-        The preprocessed image.
-    """
-
-    image = np.array(image) - np.array([123., 117., 104.])
-    image /= np.array([58.395, 57.12, 57.375])
-    image = image.transpose((2, 0, 1))
-    image = image[np.newaxis, :]
-    return image
-
-######################################################################
-# Compile the Model
-# -----------------
-# Here we define a function that invokes the NNVM compiler.
-#
-def compile_net(net, target_host, target, data_shape, params):
-    """Compiles an NNVM computation graph.
-
-    Parameters
-    ----------
-    net : nnvm.Graph
-        The NNVM computation graph.
-
-    target_host : str
-        The target to compile the host portion of the library.
-
-    target : str
-        The target to compile the device portion of the library.
-
-    data_shape : tuple
-        The shape of the input data (image).
-
-    params : dict[str -> NDArray]
-        Model parameters.
-
-    Returns
-    -------
-    graph : Graph
-        The final execution graph.
-
-    libmod : tvm.Module
-        The module that comes with the execution graph
-
-    params : dict[str -> NDArray]
-        The updated parameters of graph if params is passed.
-        This can be different from the params passed in.
-    """
-
-    print("Compiling the neural network...")
-
-    with nnvm.compiler.build_config(opt_level=0):
-        deploy_graph, lib, deploy_params = nnvm.compiler.build(
-            net,
-            target_host=target_host,
-            target=target,
-            shape={"data": data_shape},
-            params=params)
-
-    print("- Complilation completed!")
-    return deploy_graph, lib, deploy_params
-
-######################################################################
-# Demo 1: Deploy Locally
-# ----------------------
-# In this demo, we will compile the model targetting the local machine.
-#
-# Then we will demonstrate how to save the compiled model as a shared library
-# and load it back.
-#
-# Finally, we will run the model.
-#
-def deploy_local():
-    """Runs the demo that deploys a model locally.
-    """
-
-    # Load resnet model.
-    net, params, data_shape, out_shape = load_mxnet_resnet()
-
-    # Compile the model.
-    # Note that we specify the the host target as "llvm".
-    deploy_graph, lib, deploy_params = compile_net(
-        net,
-        target_host="llvm",
-        target="opengl",
-        data_shape=data_shape,
-        params=params)
-
-    # Save the compiled module.
-    # Note we need to save all three files returned from the NNVM compiler.
-    print("Saving the compiled module...")
-    from tvm.contrib import util
-    temp = util.tempdir()
-
-    path_lib = temp.relpath("deploy_lib.so")
-    path_graph_json = temp.relpath("deploy_graph.json")
-    path_params = temp.relpath("deploy_param.params")
-
-    lib.export_library(path_lib)
-    with open(path_graph_json, "w") as fo:
-        fo.write(deploy_graph.json())
-    with open(path_params, "wb") as fo:
-        fo.write(nnvm.compiler.save_param_dict(deploy_params))
-
-    print("- Saved files:", temp.listdir())
-
-    # Load the module back.
-    print("Loading the module back...")
-    loaded_lib = tvm.module.load(path_lib)
-    with open(path_graph_json) as fi:
-        loaded_graph_json = fi.read()
-    with open(path_params, "rb") as fi:
-        loaded_params = bytearray(fi.read())
-    print("- Module loaded!")
-
-    # Run the model! We will perform prediction on an image.
-    print("Running the graph...")
-    from tvm.contrib import graph_runtime
-
-    module = graph_runtime.create(loaded_graph_json, loaded_lib, tvm.opengl(0))
-    module.load_params(loaded_params)
-
-    image = transform_image(download_image())
-    input_data = tvm.nd.array(image.astype("float32"), ctx=tvm.opengl(0))
-
-    module.set_input("data", input_data)
-    module.run()
-
-    # Retrieve the output.
-    out = module.get_output(0, tvm.nd.empty(out_shape, ctx=tvm.opengl(0)))
-    top1 = np.argmax(out.asnumpy())
-    synset = download_synset()
-    print('TVM prediction top-1:', top1, synset[top1])
-
-if run_deploy_local and opengl_enabled:
-    deploy_local()
-
-######################################################################
-# Demo 2: Deploy the Model to WebGL Remotely with RPC
-# -------------------------------------------------------
-# Following the steps above, we can also compile the model for WebGL.
-# TVM provides rpc module to help with remote deploying.
-#
-# When we deploy a model locally to OpenGL, the model consists of two parts:
-# the host LLVM part and the device GLSL part. Now that we want to deploy to
-# WebGL, we need to leverage Emscripten to transform LLVM into JavaScript. In
-# order to do that, we will need to specify the host target as
-# 'llvm -target=asmjs-unknown-emscripten -system-lib`. Then call Emscripten to
-# compile the LLVM binary output into a JavaScript file.
-#
-# First, we need to manually start an RPC server. Please follow the instructions
-# in `tvm/web/README.md`. After following the steps, you should have a web page
-# opened in a browser, and a Python script running a proxy.
-#
-def deploy_rpc():
-    """Runs the demo that deploys a model remotely through RPC.
-    """
-    from tvm import rpc
-    from tvm.contrib import util, emscripten
-
-    # As usual, load the resnet18 model.
-    net, params, data_shape, out_shape = load_mxnet_resnet()
-
-    # Compile the model.
-    # Note that this time we are changing the target.
-    # This is because we want to translate the host library into JavaScript
-    # through Emscripten.
-    graph, lib, params = compile_net(
-        net,
-        target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
-        target="opengl",
-        data_shape=data_shape,
-        params=params)
-
-    # Now we want to deploy our model through RPC.
-    # First we ned to prepare the module files locally.
-    print("Saving the compiled module...")
-
-    temp = util.tempdir()
-    path_obj = temp.relpath("deploy.bc") # host LLVM part
-    path_dso = temp.relpath("deploy.js") # host JavaScript part
-    path_gl = temp.relpath("deploy.gl") # device GLSL part
-    path_json = temp.relpath("deploy.tvm_meta.json")
-
-    lib.save(path_obj)
-    emscripten.create_js(path_dso, path_obj, side_module=True)
-    lib.imported_modules[0].save(path_gl)
-
-    print("- Saved files:", temp.listdir())
-
-    # Connect to the RPC server.
-    print("Connecting to RPC server...")
-    proxy_host = 'localhost'
-    proxy_port = 9090
-    remote = rpc.connect(proxy_host, proxy_port, key="js")
-    print("- Connected to RPC server!")
-
-    # Upload module to RPC server.
-    print("Uploading module to RPC server...")
-    remote.upload(path_dso, "deploy.dso")
-    remote.upload(path_gl)
-    remote.upload(path_json)
-    print("- Upload completed!")
-
-    # Load remote library.
-    print("Loading remote library...")
-    fdev = remote.load_module("deploy.gl")
-    fhost = remote.load_module("deploy.dso")
-    fhost.import_module(fdev)
-    rlib = fhost
-    print("- Remote library loaded!")
-
-    ctx = remote.opengl(0)
-
-    # Upload the parameters.
-    print("Uploading parameters...")
-    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
-    print("- Parameters uploaded!")
-
-    # Create the remote runtime module.
-    print("Running remote module...")
-    from tvm.contrib import graph_runtime
-    module = graph_runtime.create(graph, rlib, ctx)
-
-    # Set parameter.
-    module.set_input(**rparams)
-
-    # Set input data.
-    input_data = np.random.uniform(size=data_shape)
-    module.set_input('data', tvm.nd.array(input_data.astype('float32')))
-
-    # Run.
-    module.run()
-    print("- Remote module execution completed!")
-
-    out = module.get_output(0, out=tvm.nd.empty(out_shape, ctx=ctx))
-    # Print first 10 elements of output.
-    print(out.asnumpy()[0][0:10])
-
-if run_deploy_rpc and opengl_enabled:
-    deploy_rpc()
-
-######################################################################
-# Demo 3: Deploy the Model to WebGL SystemLib
-# -----------------------------------------------
-# This time we are not using RPC. Instead, we will compile the model and link it
-# with the entire tvm runtime into a single giant JavaScript file. Then we will
-# run the model using JavaScript.
-#
-def deploy_web():
-    """Runs the demo that deploys to web.
-    """
-
-    import base64
-    import json
-    import os
-    import shutil
-    import SimpleHTTPServer, SocketServer
-
-    from tvm.contrib import emscripten
-
-    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(os.getcwd())))
-    working_dir = os.getcwd()
-    output_dir = os.path.join(working_dir, "resnet")
-    if not os.path.exists(output_dir):
-        os.makedirs(output_dir)
-
-    # As usual, load the resnet18 model.
-    net, params, data_shape, out_shape = load_mxnet_resnet()
-
-    # As usual, compile the model.
-    graph, lib, params = compile_net(
-        net,
-        target_host="llvm -target=asmjs-unknown-emscripten -system-lib",
-        target="opengl",
-        data_shape=data_shape,
-        params=params)
-
-    # Now we save the model and link it with the TVM web runtime.
-    path_lib = os.path.join(output_dir, "resnet.js")
-    path_graph = os.path.join(output_dir, "resnet.json")
-    path_params = os.path.join(output_dir, "resnet.params")
-    path_data_shape = os.path.join(output_dir, "data_shape.json")
-    path_out_shape = os.path.join(output_dir, "out_shape.json")
-
-    lib.export_library(path_lib, emscripten.create_js, options=[
-        "-s", "USE_GLFW=3",
-        "-s", "USE_WEBGL2=1",
-        "-lglfw",
-        "-s", "TOTAL_MEMORY=1073741824",
-    ])
-    with open(path_graph, "w") as fo:
-        fo.write(graph.json())
-    with open(path_params, "w") as fo:
-        fo.write(base64.b64encode(nnvm.compiler.save_param_dict(params)))
-
-    shutil.copyfile(os.path.join(curr_path, "../tvm/web/tvm_runtime.js"),
-                    os.path.join(output_dir, "tvm_runtime.js"))
-    shutil.copyfile(os.path.join(curr_path, "web/resnet.html"),
-                    os.path.join(output_dir, "resnet.html"))
-
-    # Now we want to save some extra files so that we can execute the model from
-    # JavaScript.
-    # - data shape
-    with open(path_data_shape, "w") as fo:
-        json.dump(list(data_shape), fo)
-    # - out shape
-    with open(path_out_shape, "w") as fo:
-        json.dump(list(out_shape), fo)
-    # - input image
-    image = download_image()
-    image.save(os.path.join(output_dir, "data.png"))
-    # - synset
-    synset = download_synset()
-    with open(os.path.join(output_dir, "synset.json"), "w") as fo:
-        json.dump(synset, fo)
-
-    print("Output files are in", output_dir)
-
-    # Finally, we fire up a simple web server to serve all the exported files.
-    print("Now running a simple server to serve the files...")
-    os.chdir(output_dir)
-    port = 8080
-    handler = SimpleHTTPServer.SimpleHTTPRequestHandler
-    httpd = SocketServer.TCPServer(("", port), handler)
-    print("Please open http://localhost:" + str(port) + "/resnet.html")
-    httpd.serve_forever()
-
-if run_deploy_web and opengl_enabled:
-    deploy_web()
diff --git a/nnvm/tutorials/from_onnx.py b/nnvm/tutorials/from_onnx.py
deleted file mode 100644
index 97d154615e67..000000000000
--- a/nnvm/tutorials/from_onnx.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile ONNX Models
-===================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_
-
-This article is an introductory tutorial to deploy ONNX models with NNVM.
-
-For us to begin with, onnx module is required to be installed.
-
-A quick solution is to install protobuf compiler, and
-
-.. code-block:: bash
-
-    pip install onnx --user
-
-or please refer to offical site.
-https://github.com/onnx/onnx
-"""
-import nnvm
-import tvm
-from tvm.contrib.download import download_testdata
-import onnx
-import numpy as np
-
-######################################################################
-# Load pretrained ONNX model
-# ---------------------------------------------
-# The example super resolution model used here is exactly the same model in onnx tutorial
-# http://pytorch.org/tutorials/advanced/super_resolution_with_caffe2.html
-# we skip the pytorch model construction part, and download the saved onnx model
-model_url = ''.join(['https://gist.github.com/zhreshold/',
-                     'bcda4716699ac97ea44f791c24310193/raw/',
-                     '93672b029103648953c4e5ad3ac3aadf346a4cdc/',
-                     'super_resolution_0.2.onnx'])
-model_path = download_testdata(model_url, 'super_resolution.onnx', module='onnx')
-# now you have super_resolution.onnx on disk
-onnx_model = onnx.load_model(model_path)
-# we can load the graph as NNVM compatible model
-sym, params = nnvm.frontend.from_onnx(onnx_model)
-
-######################################################################
-# Load a test image
-# ---------------------------------------------
-# A single cat dominates the examples!
-from PIL import Image
-img_url = 'https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true'
-img_path = download_testdata(img_url, 'cat.png', module='data')
-img = Image.open(img_path).resize((224, 224))
-img_ycbcr = img.convert("YCbCr")  # convert to YCbCr
-img_y, img_cb, img_cr = img_ycbcr.split()
-x = np.array(img_y)[np.newaxis, np.newaxis, :, :]
-
-######################################################################
-# Compile the model on NNVM
-# ---------------------------------------------
-# We should be familiar with the process right now.
-import nnvm.compiler
-target = 'cuda'
-# assume first input name is data
-input_name = sym.list_input_names()[0]
-shape_dict = {input_name: x.shape}
-with nnvm.compiler.build_config(opt_level=3):
-    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-
-######################################################################
-# Execute on TVM
-# ---------------------------------------------
-# The process is no different from other example
-from tvm.contrib import graph_runtime
-ctx = tvm.gpu(0)
-dtype = 'float32'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-output_shape = (1, 1, 672, 672)
-tvm_output = m.get_output(0, tvm.nd.empty(output_shape, dtype)).asnumpy()
-
-######################################################################
-# Display results
-# ---------------------------------------------
-# We put input and output image neck to neck
-from matplotlib import pyplot as plt
-out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode='L')
-out_cb = img_cb.resize(out_y.size, Image.BICUBIC)
-out_cr = img_cr.resize(out_y.size, Image.BICUBIC)
-result = Image.merge('YCbCr', [out_y, out_cb, out_cr]).convert('RGB')
-canvas = np.full((672, 672*2, 3), 255)
-canvas[0:224, 0:224, :] = np.asarray(img)
-canvas[:, 672:, :] = np.asarray(result)
-plt.imshow(canvas.astype(np.uint8))
-plt.show()
diff --git a/nnvm/tutorials/from_tensorflow.py b/nnvm/tutorials/from_tensorflow.py
deleted file mode 100644
index 6a30443dba60..000000000000
--- a/nnvm/tutorials/from_tensorflow.py
+++ /dev/null
@@ -1,239 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile Tensorflow Models
-=========================
-This article is an introductory tutorial to deploy tensorflow models with TVM.
-
-For us to begin with, tensorflow python module is required to be installed.
-
-Please refer to https://www.tensorflow.org/install
-"""
-
-# tvm and nnvm
-import nnvm
-import tvm
-
-# os and numpy
-import numpy as np
-import os.path
-
-# Tensorflow imports
-import tensorflow as tf
-from tensorflow.core.framework import graph_pb2
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import tensor_util
-
-# Tensorflow utility functions
-import tvm.relay.testing.tf as tf_testing
-
-# Base location for model related files.
-repo_base = 'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
-
-# Test image
-img_name = 'elephant-299.jpg'
-image_url = os.path.join(repo_base, img_name)
-
-######################################################################
-# Tutorials
-# ---------
-# .. note::
-#
-#   protobuf should be exported with :any:`add_shapes=True` option.
-#   Could use https://github.com/dmlc/web-data/tree/master/tensorflow/scripts/tf-to-nnvm.py
-#   to add shapes for existing models.
-#
-# Please refer docs/frontend/tensorflow.md for more details for various models
-# from tensorflow.
-
-model_name = 'classify_image_graph_def-with_shapes.pb'
-model_url = os.path.join(repo_base, model_name)
-
-# Image label map
-map_proto = 'imagenet_2012_challenge_label_map_proto.pbtxt'
-map_proto_url = os.path.join(repo_base, map_proto)
-
-# Human readable text for labels
-label_map = 'imagenet_synset_to_human_label_map.txt'
-label_map_url = os.path.join(repo_base, label_map)
-
-# Target settings
-# Use these commented settings to build for cuda.
-#target = 'cuda'
-#target_host = 'llvm'
-#layout = "NCHW"
-#ctx = tvm.gpu(0)
-target = 'llvm'
-target_host = 'llvm'
-layout = None
-ctx = tvm.cpu(0)
-
-######################################################################
-# Download required files
-# -----------------------
-# Download files listed above.
-from tvm.contrib.download import download_testdata
-
-img_path = download_testdata(image_url, img_name, module='data')
-model_path = download_testdata(model_url, model_name, module=['tf', 'InceptionV1'])
-map_proto_path = download_testdata(map_proto_url, map_proto, module='data')
-label_path = download_testdata(label_map_url, label_map, module='data')
-
-######################################################################
-# Import model
-# ------------
-# Creates tensorflow graph definition from protobuf file.
-
-with tf.gfile.FastGFile(model_path, 'rb') as f:
-    graph_def = tf.GraphDef()
-    graph_def.ParseFromString(f.read())
-    graph = tf.import_graph_def(graph_def, name='')
-    # Call the utility to import the graph definition into default graph.
-    graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-    # Add shapes to the graph.
-    with tf.Session() as sess:
-        graph_def = tf_testing.AddShapesToGraphDef(sess, 'softmax')
-
-######################################################################
-# Decode image
-# ------------
-# .. note::
-#
-#   tensorflow frontend import doesn't support preprocessing ops like JpegDecode.
-#   JpegDecode is bypassed (just return source node).
-#   Hence we supply decoded frame to TVM instead.
-#
-
-from PIL import Image
-image = Image.open(img_path).resize((299, 299))
-
-x = np.array(image)
-
-######################################################################
-# Import the graph to NNVM
-# ------------------------
-# Import tensorflow graph definition to nnvm.
-#
-# Results:
-#   sym: nnvm graph for given tensorflow protobuf.
-#   params: params converted from tensorflow params (tensor protobuf).
-sym, params = nnvm.frontend.from_tensorflow(graph_def, layout=layout)
-
-print("Tensorflow protobuf imported as nnvm graph")
-######################################################################
-# NNVM Compilation
-# ----------------
-# Compile the graph to llvm target with given input specification.
-#
-# Results:
-#   graph: Final graph after compilation.
-#   params: final params after compilation.
-#   lib: target library which can be deployed on target with tvm runtime.
-
-import nnvm.compiler
-shape_dict = {'DecodeJpeg/contents': x.shape}
-dtype_dict = {'DecodeJpeg/contents': 'uint8'}
-graph, lib, params = nnvm.compiler.build(sym, shape=shape_dict, target=target, target_host=target_host, dtype=dtype_dict, params=params)
-
-######################################################################
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now we can try deploying the NNVM compiled model on target.
-
-from tvm.contrib import graph_runtime
-dtype = 'uint8'
-m = graph_runtime.create(graph, lib, ctx)
-# set inputs
-m.set_input('DecodeJpeg/contents', tvm.nd.array(x.astype(dtype)))
-m.set_input(**params)
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0, tvm.nd.empty(((1, 1008)), 'float32'))
-
-######################################################################
-# Process the output
-# ------------------
-# Process the model output to human readable text for InceptionV1.
-predictions = tvm_output.asnumpy()
-predictions = np.squeeze(predictions)
-
-# Creates node ID --> English string lookup.
-node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path,
-                                    uid_lookup_path=label_path)
-
-# Print top 5 predictions from TVM output.
-top_k = predictions.argsort()[-5:][::-1]
-for node_id in top_k:
-    human_string = node_lookup.id_to_string(node_id)
-    score = predictions[node_id]
-    print('%s (score = %.5f)' % (human_string, score))
-
-######################################################################
-# Inference on tensorflow
-# -----------------------
-# Run the corresponding model on tensorflow
-
-def create_graph():
-    """Creates a graph from saved GraphDef file and returns a saver."""
-    # Creates graph from saved graph_def.pb.
-    with tf.gfile.FastGFile(model_path, 'rb') as f:
-        graph_def = tf.GraphDef()
-        graph_def.ParseFromString(f.read())
-        graph = tf.import_graph_def(graph_def, name='')
-        # Call the utility to import the graph definition into default graph.
-        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
-
-def run_inference_on_image(image):
-    """Runs inference on an image.
-
-    Parameters
-    ----------
-    image: String
-        Image file name.
-
-    Returns
-    -------
-        Nothing
-    """
-    if not tf.gfile.Exists(image):
-        tf.logging.fatal('File does not exist %s', image)
-    image_data = tf.gfile.FastGFile(image, 'rb').read()
-
-    # Creates graph from saved GraphDef.
-    create_graph()
-
-    with tf.Session() as sess:
-        softmax_tensor = sess.graph.get_tensor_by_name('softmax:0')
-        predictions = sess.run(softmax_tensor,
-                               {'DecodeJpeg/contents:0': image_data})
-
-        predictions = np.squeeze(predictions)
-
-        # Creates node ID --> English string lookup.
-        node_lookup = tf_testing.NodeLookup(label_lookup_path=map_proto_path,
-                                            uid_lookup_path=label_path)
-
-        # Print top 5 predictions from tensorflow.
-        top_k = predictions.argsort()[-5:][::-1]
-        print ("===== TENSORFLOW RESULTS =======")
-        for node_id in top_k:
-            human_string = node_lookup.id_to_string(node_id)
-            score = predictions[node_id]
-            print('%s (score = %.5f)' % (human_string, score))
-
-run_inference_on_image(img_path)
diff --git a/nnvm/tutorials/get_started.py b/nnvm/tutorials/get_started.py
deleted file mode 100644
index 46f711e7d347..000000000000
--- a/nnvm/tutorials/get_started.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Get Started with NNVM
-=====================
-**Author**: `Tianqi Chen <https://tqchen.github.io/>`_
-
-This article is an introductory tutorial to workflow in NNVM.
-"""
-import nnvm.compiler
-import nnvm.symbol as sym
-
-######################################################################
-# Declare Computation
-# -------------------
-# We start by describing our need using computational graph.
-# Most deep learning frameworks use computation graph to describe
-# their computation. In this example, we directly use
-# NNVM's API to construct the computational graph.
-#
-# .. note::
-#
-#   In a typical deep learning compilation workflow,
-#   we can get the models from :any:`nnvm.frontend`
-#
-# The following code snippet describes :math:`z = x + \sqrt{y}`
-# and creates a nnvm graph from the description.
-# We can print out the graph ir to check the graph content.
-
-x = sym.Variable("x")
-y = sym.Variable("y")
-z = sym.elemwise_add(x, sym.sqrt(y))
-compute_graph = nnvm.graph.create(z)
-print("-------compute graph-------")
-print(compute_graph.ir())
-
-######################################################################
-# Compile
-# -------
-# We can call :any:`nnvm.compiler.build` to compile the graph.
-# The build function takes a shape parameter which specifies the
-# input shape requirement. Here we only need to pass in shape of ``x``
-# and the other one will be inferred automatically by NNVM.
-#
-# The function returns three values. ``deploy_graph`` contains
-# the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module`
-# that contains compiled CUDA functions. We do not need the ``params``
-# in this case.
-shape = (4,)
-deploy_graph, lib, params = nnvm.compiler.build(
-    compute_graph, target="cuda", shape={"x": shape}, dtype="float32")
-
-######################################################################
-# We can print out the IR of ``deploy_graph`` to understand what just
-# happened under the hood. We can find that ``deploy_graph`` only
-# contains a single operator ``tvm_op``. This is because NNVM
-# automatically fused the operator together into one operator.
-#
-print("-------deploy graph-------")
-print(deploy_graph.ir())
-
-######################################################################
-# Let us also peek into content of ``lib``.
-# Typically a compiled TVM CUDA module contains a host module(lib)
-# and a device module(``lib.imported_modules[0]``) that contains the CUDA code.
-# We print out the the generated device code here.
-# This is exactly a fused CUDA version of kernel that the graph points to.
-#
-print("-------deploy library-------")
-print(lib.imported_modules[0].get_source())
-
-######################################################################
-# Deploy and Run
-# --------------
-# Now that we have have compiled module, let us run it.
-# We can use :any:`graph_runtime <tvm.contrib.graph_runtime.create>`
-# in tvm to create a deployable :any:`GraphModule <tvm.contrib.graph_runtime.GraphModule>`.
-# We can use the :any:`set_input <tvm.contrib.graph_runtime.GraphModule.set_input>`,
-# :any:`run <tvm.contrib.graph_runtime.GraphModule.run>` and
-# :any:`get_output <tvm.contrib.graph_runtime.GraphModule.get_output>` function
-# to set the input, execute the graph and get the output we need.
-#
-import tvm
-import numpy as np
-from tvm.contrib import graph_runtime, util
-
-module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0))
-x_np = np.array([1, 2, 3, 4]).astype("float32")
-y_np = np.array([4, 4, 4, 4]).astype("float32")
-# set input to the graph module
-module.set_input(x=x_np, y=y_np)
-# run forward computation
-module.run()
-# get the first output
-out = module.get_output(0, out=tvm.nd.empty(shape))
-print(out.asnumpy())
-
-######################################################################
-# Provide Model Parameters
-# ------------------------
-# Most deep learning models contains two types of inputs: parameters
-# that remains fixed during inference and data input that need to
-# change for each inference task. It is helpful to provide these
-# information to NNVM. Let us assume that ``y`` is the parameter
-# in our example. We can provide the model parameter information
-# by the params argument to :any:`nnvm.compiler.build`.
-#
-deploy_graph, lib, params = nnvm.compiler.build(
-    compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np})
-
-######################################################################
-# This time we will need params value returned by :any:`nnvm.compiler.build`.
-# NNVM applys  optimization  to pre-compute the intermediate values in
-# the graph that can be determined by parameters. In this case
-# :math:`\sqrt{y}` can be pre-computed. The pre-computed values
-# are returned as new params. We can print out the new compiled library
-# to confirm that the fused kernel only now contains add.
-#
-print("-----optimized params-----")
-print(params)
-print("-------deploy library-------")
-print(lib.imported_modules[0].get_source())
-
-######################################################################
-# Save the Deployed Module
-# ------------------------
-# We can save the ``deploy_graph``, ``lib`` and ``params`` separately
-# and load them back later. We can use :any:`tvm.module.Module` to export
-# the compiled library. ``deploy_graph`` is saved in json format and ``params``
-# is serialized into a bytearray.
-#
-temp = util.tempdir()
-path_lib = temp.relpath("deploy.so")
-lib.export_library(path_lib)
-with open(temp.relpath("deploy.json"), "w") as fo:
-    fo.write(deploy_graph.json())
-with open(temp.relpath("deploy.params"), "wb") as fo:
-    fo.write(nnvm.compiler.save_param_dict(params))
-print(temp.listdir())
-
-######################################################################
-# We can load the module back.
-loaded_lib = tvm.module.load(path_lib)
-loaded_json = open(temp.relpath("deploy.json")).read()
-loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read())
-module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0))
-params = nnvm.compiler.load_param_dict(loaded_params)
-# directly load from byte array
-module.load_params(loaded_params)
-module.run(x=x_np)
-# get the first output
-out = module.get_output(0, out=tvm.nd.empty(shape))
-print(out.asnumpy())
-
-######################################################################
-# Deploy using Another Language
-# -----------------------------
-# We use python in this example for demonstration.
-# We can also deploy the compiled modules with other languages
-# supported by TVM such as  c++, java, javascript.
-# The graph module itself is fully embedded in TVM runtime.
-#
-# The following block demonstrates how we can directly use TVM's
-# runtime API to execute the compiled module.
-# You can find similar runtime API in TVMRuntime of other languages.
-#
-fcreate = tvm.get_global_func("tvm.graph_runtime.create")
-ctx = tvm.gpu(0)
-gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id)
-set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"]
-set_input("x", tvm.nd.array(x_np))
-gmodule["load_params"](loaded_params)
-run()
-out = tvm.nd.empty(shape)
-get_output(0, out)
-print(out.asnumpy())
diff --git a/nnvm/tutorials/nlp/from_darknet_rnn.py b/nnvm/tutorials/nlp/from_darknet_rnn.py
deleted file mode 100644
index 1bc9627dd62f..000000000000
--- a/nnvm/tutorials/nlp/from_darknet_rnn.py
+++ /dev/null
@@ -1,198 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Compile Darknet Models for RNN
-==============================
-**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
-
-This article is an introductory tutorial to deploy darknet rnn models with NNVM.
-
-This script will run a character prediction model
-Each module consists of 3 fully-connected layers. The input layer propagates information from the
-input to the current state. The recurrent layer propagates information through time from the
-previous state to the current one.
-
-The input to the network is a 1-hot encoding of ASCII characters. We train the network to predict
-the next character in a stream of characters. The output is constrained to be a probability
-distribution using a softmax layer.
-
-Since each recurrent layer contains information about the current character and the past
-characters, it can use this context to predict the future characters in a word or phrase.
-
-All the required models and libraries will be downloaded from the internet
-by the script.
-"""
-import random
-import numpy as np
-import tvm
-from tvm.contrib import graph_runtime
-from tvm.contrib.download import download_testdata
-from nnvm.testing.darknet import __darknetffi__
-import nnvm
-import nnvm.frontend.darknet
-
-# Set the parameters
-# -----------------------
-# Set the seed value and the number of characters to predict
-
-#Model name
-MODEL_NAME = 'rnn'
-#Seed value
-seed = 'Thus'
-#Number of characters to predict
-num = 1000
-
-# Download required files
-# -----------------------
-# Download cfg and weights file if first time.
-CFG_NAME = MODEL_NAME + '.cfg'
-WEIGHTS_NAME = MODEL_NAME + '.weights'
-REPO_URL = 'https://github.com/dmlc/web-data/blob/master/darknet/'
-CFG_URL = REPO_URL + 'cfg/' + CFG_NAME + '?raw=true'
-WEIGHTS_URL = REPO_URL + 'weights/' + WEIGHTS_NAME + '?raw=true'
-
-cfg_path = download_testdata(CFG_URL, CFG_NAME, module='darknet')
-weights_path = download_testdata(WEIGHTS_URL, WEIGHTS_NAME, module='darknet')
-
-# Download and Load darknet library
-DARKNET_LIB = 'libdarknet.so'
-DARKNET_URL = REPO_URL + 'lib/' + DARKNET_LIB + '?raw=true'
-lib_path = download_testdata(DARKNET_URL, DARKNET_LIB, module='darknet')
-DARKNET_LIB = __darknetffi__.dlopen(lib_path)
-net = DARKNET_LIB.load_network(cfg_path.encode('utf-8'), weights_path.encode('utf-8'), 0)
-dtype = 'float32'
-batch_size = 1
-
-# Import the graph to NNVM
-# ------------------------
-# Import darknet graph definition to nnvm.
-#
-# Results:
-#   sym: nnvm graph for rnn model
-#   params: params converted from darknet weights
-print("Converting darknet rnn model to nnvm symbols...")
-sym, params = nnvm.frontend.darknet.from_darknet(net, dtype)
-
-# Compile the model on NNVM
-data = np.empty([1, net.inputs], dtype)#net.inputs
-
-target = 'llvm'
-shape = {'data': data.shape}
-print("Compiling the model...")
-
-shape_dict = {'data': data.shape}
-dtype_dict = {'data': data.dtype}
-
-with nnvm.compiler.build_config(opt_level=2):
-    graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, dtype_dict, params)
-
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now we can try deploying the NNVM compiled model on cpu target.
-
-# Set the cpu context
-ctx = tvm.cpu(0)
-# Create graph runtime
-m = graph_runtime.create(graph, lib, ctx)
-# Set the params to runtime
-m.set_input(**params)
-
-def _init_state_memory(rnn_cells_count, dtype):
-    '''Initialize memory for states'''
-    states = {}
-    state_shape = (1024,)
-    for i in range(rnn_cells_count):
-        k = 'rnn' + str(i) + '_state'
-        states[k] = tvm.nd.array(np.zeros(state_shape, dtype).astype(dtype))
-    return states
-
-def _set_state_input(runtime, states):
-    '''Set the state inputs'''
-    for state in states:
-        runtime.set_input(state, states[state])
-
-def _get_state_output(runtime, states):
-    '''Get the state outputs and save'''
-    i = 1
-    for state in states:
-        data = states[state]
-        states[state] = runtime.get_output((i), tvm.nd.empty(data.shape, data.dtype))
-        i += 1
-
-def _proc_rnn_output(out_data):
-    '''Generate the characters from the output array'''
-    sum_array = 0
-    n = out_data.size
-    r = random.uniform(0, 1)
-    for j in range(n):
-        if out_data[j] < 0.0001:
-            out_data[j] = 0
-        sum_array += out_data[j]
-
-    for j in range(n):
-        out_data[j] *= float(1.0) / sum_array
-        r = r - out_data[j]
-        if r <= 0:
-            return j
-    return n-1
-
-print("RNN generaring text...")
-
-out_shape = (net.outputs,)
-rnn_cells_count = 3
-
-# Initialize state memory
-# -----------------------
-states = _init_state_memory(rnn_cells_count, dtype)
-
-len_seed = len(seed)
-count = len_seed + num
-out_txt = ""
-
-#Initialize random seed
-random.seed(0)
-c = ord(seed[0])
-inp_data = np.zeros([net.inputs], dtype)
-
-# Run the model
-# -------------
-
-# Predict character by character till `num`
-for i in range(count):
-    inp_data[c] = 1
-
-    # Set the input data
-    m.set_input('data', tvm.nd.array(inp_data.astype(dtype)))
-    inp_data[c] = 0
-
-    # Set the state inputs
-    _set_state_input(m, states)
-
-    # Run the model
-    m.run()
-
-    # Get the output
-    tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
-
-    # Get the state outputs
-    _get_state_output(m, states)
-
-    # Get the predicted character and keep buffering it
-    c = ord(seed[i])  if i < len_seed else _proc_rnn_output(tvm_out)
-    out_txt += chr(c)
-
-print("Predicted Text =", out_txt)
diff --git a/nnvm/tutorials/nlp/keras_s2s_translate.py b/nnvm/tutorials/nlp/keras_s2s_translate.py
deleted file mode 100644
index 16c737418c6f..000000000000
--- a/nnvm/tutorials/nlp/keras_s2s_translate.py
+++ /dev/null
@@ -1,254 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Keras LSTM Sequence to Sequence Model for Translation
-=================================
-**Author**: `Siju Samuel <https://siju-samuel.github.io/>`_
-
-This script demonstrates how to implement a basic character-level sequence-to-sequence model.
-We apply it to translating short English sentences into short French sentences,
-character-by-character.
-
-# Summary of the algorithm
-
-- We start with input sequences from a domain (e.g. English sentences)
-    and corresponding target sequences from another domain
-    (e.g. French sentences).
-- An encoder LSTM turns input sequences to 2 state vectors
-    (we keep the last LSTM state and discard the outputs).
-- A decoder LSTM is trained to turn the target sequences into
-    the same sequence but offset by one timestep in the future,
-    a training process called "teacher forcing" in this context.
-    Is uses as initial state the state vectors from the encoder.
-    Effectively, the decoder learns to generate `targets[t+1...]`
-    given `targets[...t]`, conditioned on the input sequence.
-
-This script loads the s2s.h5 model saved in repository
-https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/lstm_seq2seq.py
-and generates sequences from it.  It assumes that no changes have been made (for example:
-latent_dim is unchanged, and the input data and model architecture are unchanged).
-
-# References
-
-- Sequence to Sequence Learning with Neural Networks
-    https://arxiv.org/abs/1409.3215
-- Learning Phrase Representations using
-    RNN Encoder-Decoder for Statistical Machine Translation
-    https://arxiv.org/abs/1406.1078
-
-See lstm_seq2seq.py for more details on the model architecture and how it is trained.
-"""
-
-from keras.models import Model, load_model
-from keras.layers import Input
-import random
-import os
-import numpy as np
-import keras
-import tvm
-import nnvm
-
-######################################################################
-# Download required files
-# -----------------------
-# Download files listed below from dmlc web-data repo.
-model_file = "s2s_translate.h5"
-data_file = "fra-eng.txt"
-
-# Base location for model related files.
-repo_base = 'https://github.com/dmlc/web-data/raw/master/keras/models/s2s_translate/'
-model_url = os.path.join(repo_base, model_file)
-data_url = os.path.join(repo_base, data_file)
-
-# Download files listed below.
-from tvm.contrib.download import download_testdata
-model_path = download_testdata(model_url, model_file, module='keras')
-data_path = download_testdata(data_url, data_file, module='data')
-
-latent_dim = 256  # Latent dimensionality of the encoding space.
-test_samples = 10000  # Number of samples used for testing.
-
-######################################################################
-# Process the data file
-# ---------------------
-# Vectorize the data.  We use the same approach as the training script.
-# NOTE: the data must be identical, in order for the character -> integer
-# mappings to be consistent.
-input_texts = []
-target_texts = []
-input_characters = set()
-target_characters = set()
-with open(data_path, 'r', encoding='utf-8') as f:
-    lines = f.read().split('\n')
-test_samples = min(test_samples, len(lines))
-max_encoder_seq_length = 0
-max_decoder_seq_length = 0
-for line in lines[:test_samples]:
-    input_text, target_text = line.split('\t')
-    # We use "tab" as the "start sequence" character
-    # for the targets, and "\n" as "end sequence" character.
-    target_text = '\t' + target_text + '\n'
-    max_encoder_seq_length = max(max_encoder_seq_length, len(input_text))
-    max_decoder_seq_length = max(max_decoder_seq_length, len(target_text))
-    for char in input_text:
-        if char not in input_characters:
-            input_characters.add(char)
-    for char in target_text:
-        if char not in target_characters:
-            target_characters.add(char)
-
-input_characters = sorted(list(input_characters))
-target_characters = sorted(list(target_characters))
-num_encoder_tokens = len(input_characters)
-num_decoder_tokens = len(target_characters)
-input_token_index = dict(
-    [(char, i) for i, char in enumerate(input_characters)])
-target_token_index = dict(
-    [(char, i) for i, char in enumerate(target_characters)])
-
-# Reverse-lookup token index to decode sequences back to something readable.
-reverse_target_char_index = dict(
-    (i, char) for char, i in target_token_index.items())
-
-######################################################################
-# Load Keras Model
-# ----------------
-# Restore the model and construct the encoder and decoder.
-model = load_model(model_path)
-encoder_inputs = model.input[0]   # input_1
-
-encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output   # lstm_1
-encoder_states = [state_h_enc, state_c_enc]
-encoder_model = Model(encoder_inputs, encoder_states)
-
-decoder_inputs = model.input[1]   # input_2
-decoder_state_input_h = Input(shape=(latent_dim,), name='input_3')
-decoder_state_input_c = Input(shape=(latent_dim,), name='input_4')
-decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
-decoder_lstm = model.layers[3]
-decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
-    decoder_inputs, initial_state=decoder_states_inputs)
-decoder_states = [state_h_dec, state_c_dec]
-decoder_dense = model.layers[4]
-decoder_outputs = decoder_dense(decoder_outputs)
-decoder_model = Model(
-    [decoder_inputs] + decoder_states_inputs,
-    [decoder_outputs] + decoder_states)
-
-######################################################################
-# Compile both encoder and decoder model on NNVM
-# ----------------------------------------------
-# Creates NNVM graph definition from keras model file.
-from tvm.contrib import graph_runtime
-target = 'llvm'
-ctx = tvm.cpu(0)
-
-# Parse Encoder model
-sym, params = nnvm.frontend.from_keras(encoder_model)
-inp_enc_shape = (1, max_encoder_seq_length, num_encoder_tokens)
-shape_dict = {'input_1': inp_enc_shape}
-
-# Build Encoder model
-with nnvm.compiler.build_config(opt_level=2):
-    enc_graph, enc_lib, enc_params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-print("Encoder build ok.")
-
-# Create graph runtime for encoder model
-tvm_enc = graph_runtime.create(enc_graph, enc_lib, ctx)
-tvm_enc.set_input(**enc_params)
-
-# Parse Decoder model
-inp_dec_shape = (1, 1, num_decoder_tokens)
-shape_dict = {'input_2': inp_dec_shape,
-              'input_3': (1, latent_dim),
-              'input_4': (1, latent_dim)}
-
-# Build Decoder model
-sym, params = nnvm.frontend.from_keras(decoder_model)
-with nnvm.compiler.build_config(opt_level=2):
-    dec_graph, dec_lib, dec_params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-print("Decoder build ok.")
-
-# Create graph runtime for decoder model
-tvm_dec = graph_runtime.create(dec_graph, dec_lib, ctx)
-tvm_dec.set_input(**dec_params)
-
-# Decodes an input sequence.
-def decode_sequence(input_seq):
-    # Set the input for encoder model.
-    tvm_enc.set_input('input_1', input_seq)
-
-    # Run encoder model
-    tvm_enc.run()
-
-    # Get states from encoder network
-    h = tvm_enc.get_output(0).asnumpy()
-    c = tvm_enc.get_output(1).asnumpy()
-
-    # Populate the first character of target sequence with the start character.
-    sampled_token_index = target_token_index['\t']
-
-    # Sampling loop for a batch of sequences
-    decoded_sentence = ''
-    while True:
-        # Generate empty target sequence of length 1.
-        target_seq = np.zeros((1, 1, num_decoder_tokens), dtype='float32')
-        # Update the target sequence (of length 1).
-        target_seq[0, 0, sampled_token_index] = 1.
-
-        # Set the input and states for decoder model.
-        tvm_dec.set_input('input_2', target_seq)
-        tvm_dec.set_input('input_3', h)
-        tvm_dec.set_input('input_4', c)
-        # Run decoder model
-        tvm_dec.run()
-
-        output_tokens = tvm_dec.get_output(0).asnumpy()
-        h = tvm_dec.get_output(1).asnumpy()
-        c = tvm_dec.get_output(2).asnumpy()
-
-        # Sample a token
-        sampled_token_index = np.argmax(output_tokens[0, -1, :])
-        sampled_char = reverse_target_char_index[sampled_token_index]
-
-        # Exit condition: either hit max length or find stop character.
-        if sampled_char == '\n':
-            break
-
-        # Update the sentence
-        decoded_sentence += sampled_char
-        if len(decoded_sentence) > max_decoder_seq_length:
-            break
-    return decoded_sentence
-
-def generate_input_seq(input_text):
-    input_seq = np.zeros((1, max_encoder_seq_length, num_encoder_tokens), dtype='float32')
-    for t, char in enumerate(input_text):
-        input_seq[0, t, input_token_index[char]] = 1.
-    return input_seq
-
-######################################################################
-# Run the model
-# -------------
-# Randonly take some text from test samples and translate
-for seq_index in range(100):
-    # Take one sentence randomly and try to decode.
-    index = random.randint(1, test_samples)
-    input_text, _ = lines[index].split('\t')
-    input_seq = generate_input_seq(input_text)
-    decoded_sentence = decode_sequence(input_seq)
-    print((seq_index + 1), ": ", input_text,  "==>", decoded_sentence)
diff --git a/nnvm/tutorials/tune_nnvm_arm.py b/nnvm/tutorials/tune_nnvm_arm.py
deleted file mode 100644
index d61130b852cc..000000000000
--- a/nnvm/tutorials/tune_nnvm_arm.py
+++ /dev/null
@@ -1,427 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for ARM CPU (NNVM)
-======================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Zhao Wu <https://github.com/FrozenGene>`_
-
-Auto-tuning for a specific ARM device is critical for getting the best
-performance. This is a tutorial about how to tune a whole convolutional
-network.
-
-The operator implementation for ARM CPU in TVM is written in template form.
-The template has many tunable knobs (tile factor, vectorization, unrolling, etc).
-We will tune all convolution and depthwise convolution operators
-in the neural network. After tuning, we produce a log file which stores
-the best knob values for all required operators. When the tvm compiler compiles
-these operators, it will query this log file to get the best knob values.
-
-We also released pre-tuned parameters for some arm devices. You can go to
-`ARM CPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#arm-cpu>`_
-to see the results.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user psutil xgboost tornado
-#
-# To make tvm run faster during tuning, it is recommended to use cython
-# as FFI of tvm. In the root directory of tvm, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user cython
-#   sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.util import tempdir
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-
-def get_network(name, batch_size):
-    """Get the symbol definition and random weight of a network"""
-    input_shape = (batch_size, 3, 224, 224)
-    output_shape = (batch_size, 1000)
-
-    if "resnet" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif "vgg" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif name == 'mobilenet':
-        net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
-    elif name == 'squeezenet_v1.1':
-        net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
-    elif name == 'inception_v3':
-        input_shape = (1, 3, 299, 299)
-        net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
-    elif name == 'custom':
-        # an example for custom network
-        from nnvm.testing import utils
-        net = nnvm.sym.Variable('data')
-        net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
-        net = nnvm.sym.flatten(net)
-        net = nnvm.sym.dense(net, units=1000)
-        net, params = utils.create_workload(net, batch_size, (3, 224, 224))
-    elif name == 'mxnet':
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-        block = get_model('resnet18_v1', pretrained=True)
-        net, params = nnvm.frontend.from_mxnet(block)
-        net = nnvm.sym.softmax(net)
-    else:
-        raise ValueError("Unsupported network: " + name)
-
-    return net, params, input_shape, output_shape
-
-
-#################################################################
-# Start RPC Tracker
-# -----------------
-# TVM uses RPC session to communicate with ARM boards.
-# During tuning, the tuner will send the generated code to the board and
-# measure the speed of code on the board.
-#
-# To scale up the tuning, TVM uses RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized master node. We can register all devices to
-# the tracker. For example, if we have 10 phones, we can register all of them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal for
-# this command:
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is
-#
-# .. code-block:: bash
-#
-#   INFO:RPCTracker:bind to 0.0.0.0:9190
-
-#################################################################
-# Register devices to RPC Tracker
-# -----------------------------------
-# Now we can register our devices to the tracker. The first step is to
-# build tvm runtime for the ARM devices.
-#
-# * For Linux:
-#   Follow this section :ref:`build-tvm-runtime-on-device` to build
-#   tvm runtime on the device. Then register the device to tracker by
-#
-#   .. code-block:: bash
-#
-#     python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399
-#
-#   (replace :code:`[HOST_IP]` with the IP address of your host machine)
-#
-# * For Android:
-#   Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
-#   install tvm rpc apk on the android device. Make sure you can pass the android rpc test.
-#   Then you have already registred your device. During tuning, you have to go to developer option
-#   and enable "Keep screen awake during changing" and charge your phone to make it stable.
-#
-# After registering devices, we can confirm it by querying rpc_tracker
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190
-#
-# For example, if we have 2 Huawei mate10 pro, 11 Raspberry Pi 3B and 2 rk3399,
-# the output can be
-#
-# .. code-block:: bash
-#
-#    Queue Status
-#    ----------------------------------
-#    key          total  free  pending
-#    ----------------------------------
-#    mate10pro    2      2     0
-#    rk3399       2      2     0
-#    rpi3b        11     11    0
-#    ----------------------------------
-#
-# You can register multiple devices to the tracker to accelerate the measurement in tuning.
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations. Here I use an RK3399 board
-# as example. In your setting, you should modify the target and device_key accordingly.
-# set :code:`use_android` to True if you use android phone.
-
-#### DEVICE CONFIG ####
-
-# Replace "aarch64-linux-gnu" with the correct target of your board.
-# This target is used for cross compilation. You can query it by :code:`gcc -v` on your device.
-target = tvm.target.create('llvm -device=arm_cpu -target=aarch64-linux-gnu')
-
-# Also replace this with the device key in your tracker
-device_key = 'rk3399'
-
-# Set this to True if you use android phone
-use_android = False
-
-#### TUNING OPTION ####
-network = 'resnet-18'
-log_file = "%s.%s.log" % (device_key, network)
-dtype = 'float32'
-
-tuning_option = {
-    'log_filename': log_file,
-
-    'tuner': 'xgb',
-    'n_trial': 2000,
-    'early_stopping': 800,
-
-    'measure_option': autotvm.measure_option(
-        builder=autotvm.LocalBuilder(
-            build_func='ndk' if use_android else 'default'),
-        runner=autotvm.RPCRunner(
-            device_key, host='localhost', port=9190,
-            number=5,
-            timeout=4,
-        ),
-    ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-#   In general, the default values provided here work well.
-#   If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,
-#   which makes the tuning run longer.
-#   If your device runs very slow or your conv2d operators have many GFLOPs, considering to
-#   set timeout larger.
-#
-#   If your model has depthwise convolution, you could consider setting
-#   :code:`try_spatial_pack_depthwise` be :code:`True`, which perform better than default
-#   optimization in general. For example, on ARM CPU A53 2.0GHz, we find it could boost 1.6x
-#   performance of depthwise convolution on Mobilenet V1 model.
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(tasks,
-               measure_option,
-               tuner='xgb',
-               n_trial=1000,
-               early_stopping=None,
-               log_filename='tuning.log',
-               use_transfer_learning=True,
-               try_winograd=True,
-               try_spatial_pack_depthwise=False):
-    if try_winograd:
-        for i in range(len(tasks)):
-            try:  # try winograd template
-                tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
-                                          tasks[i].target, tasks[i].target_host, 'winograd')
-                input_channel = tsk.workload[1][1]
-                if input_channel >= 64:
-                    tasks[i] = tsk
-            except Exception:
-                pass
-
-    # if we want to use spatial pack for depthwise convolution
-    if try_spatial_pack_depthwise:
-        tuner = 'xgb_knob'
-        for i in range(len(tasks)):
-            if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw':
-                tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
-                                          tasks[i].target, tasks[i].target_host,
-                                          'contrib_spatial_pack')
-                tasks[i] = tsk
-
-    # create tmp log file
-    tmp_log_file = log_filename + ".tmp"
-    if os.path.exists(tmp_log_file):
-        os.remove(tmp_log_file)
-
-    for i, tsk in enumerate(reversed(tasks)):
-        prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
-
-        # create tuner
-        if tuner == 'xgb' or tuner == 'xgb-rank':
-            tuner_obj = XGBTuner(tsk, loss_type='rank')
-        elif tuner == 'xgb_knob':
-            tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
-        elif tuner == 'ga':
-            tuner_obj = GATuner(tsk, pop_size=50)
-        elif tuner == 'random':
-            tuner_obj = RandomTuner(tsk)
-        elif tuner == 'gridsearch':
-            tuner_obj = GridSearchTuner(tsk)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        if use_transfer_learning:
-            if os.path.isfile(tmp_log_file):
-                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
-        # do tuning
-        n_trial = min(n_trial, len(tsk.config_space))
-        tuner_obj.tune(n_trial=n_trial,
-                       early_stopping=early_stopping,
-                       measure_option=measure_option,
-                       callbacks=[
-                           autotvm.callback.progress_bar(n_trial, prefix=prefix),
-                           autotvm.callback.log_to_file(tmp_log_file)])
-
-    # pick best records to a cache file
-    autotvm.record.pick_best(tmp_log_file, log_filename)
-    os.remove(tmp_log_file)
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
-    # extract workloads from nnvm graph
-    print("Extract tasks...")
-    net, params, input_shape, out_shape = get_network(network, batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d,))
-
-    # run tuning tasks
-    print("Tuning...")
-    tune_tasks(tasks, **tuning_opt)
-
-    # compile kernels with history best records
-    with autotvm.apply_history_best(log_file):
-        print("Compile...")
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(
-                net, target=target, shape={'data': input_shape}, params=params, dtype=dtype)
-
-        # export library
-        tmp = tempdir()
-        if use_android:
-            from tvm.contrib import ndk
-            filename = "net.so"
-            lib.export_library(tmp.relpath(filename), ndk.create_shared)
-        else:
-            filename = "net.tar"
-            lib.export_library(tmp.relpath(filename))
-
-        # upload module to device
-        print("Upload...")
-        remote = autotvm.measure.request_remote(device_key, 'localhost', 9190,
-                                                timeout=10000)
-        remote.upload(tmp.relpath(filename))
-        rlib = remote.load_module(filename)
-
-        # upload parameters to device
-        ctx = remote.context(str(target), 0)
-        module = runtime.create(graph, rlib, ctx)
-        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
-        module.set_input('data', data_tvm)
-        module.set_input(**params)
-
-        # evaluate
-        print("Evaluate inference time cost...")
-        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10)
-        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
-        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
-              (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below.
-# It takes about 2 hours on a 32T AMD Ryzen Threadripper.
-#
-# .. code-block:: bash
-#
-#    Extract tasks...
-#    Tuning...
-#    [Task  1/12]  Current/Best:   22.37/  52.19 GFLOPS | Progress: (544/1000) | 406.59 s Done.
-#    [Task  2/12]  Current/Best:    6.51/  18.77 GFLOPS | Progress: (608/1000) | 325.05 s Done.
-#    [Task  3/12]  Current/Best:    4.67/  24.87 GFLOPS | Progress: (480/1000) | 372.31 s Done.
-#    [Task  4/12]  Current/Best:   11.35/  46.83 GFLOPS | Progress: (736/1000) | 602.39 s Done.
-#    [Task  5/12]  Current/Best:    1.01/  19.80 GFLOPS | Progress: (448/1000) | 262.16 s Done.
-#    [Task  6/12]  Current/Best:    2.47/  23.76 GFLOPS | Progress: (672/1000) | 563.85 s Done.
-#    [Task  7/12]  Current/Best:   14.57/  33.97 GFLOPS | Progress: (544/1000) | 465.15 s Done.
-#    [Task  8/12]  Current/Best:    1.13/  17.65 GFLOPS | Progress: (576/1000) | 365.08 s Done.
-#    [Task  9/12]  Current/Best:   14.45/  22.66 GFLOPS | Progress: (928/1000) | 724.25 s Done.
-#    [Task 10/12]  Current/Best:    3.22/  15.36 GFLOPS | Progress: (864/1000) | 564.27 s Done.
-#    [Task 11/12]  Current/Best:   11.03/  32.23 GFLOPS | Progress: (736/1000) | 635.15 s Done.
-#    [Task 12/12]  Current/Best:    8.00/  21.65 GFLOPS | Progress: (1000/1000) | 1111.81 s Done.
-#    Compile...
-#    Upload...
-#    Evaluate inference time cost...
-#    Mean inference time (std dev): 162.59 ms (0.06 ms)
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-#   The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS",
-#   then there must be something wrong.
-#
-#   First, make sure you set the correct configuration of your device.
-#   Then, you can print debug information by adding these lines in the beginning
-#   of the script. It will print every measurement result, where you can find useful
-#   error messages.
-#
-#   .. code-block:: python
-#
-#      import logging
-#      logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
diff --git a/nnvm/tutorials/tune_nnvm_cuda.py b/nnvm/tutorials/tune_nnvm_cuda.py
deleted file mode 100644
index be3f79992cb6..000000000000
--- a/nnvm/tutorials/tune_nnvm_cuda.py
+++ /dev/null
@@ -1,391 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for NVIDIA GPU (NNVM)
-=========================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_
-
-Auto-tuning for specific devices and workloads is critical for getting the
-best performance. This is a tutorial on how to tune a whole convolutional
-network for NVIDIA GPU.
-
-The operator implementation for NVIDIA GPU in TVM is written in template form.
-The template has many tunable knobs (tile factor, unrolling, etc).
-We will tune all convolution and depthwise convolution operators
-in the neural network. After tuning, we produce a log file which stores
-the best knob values for all required operators. When the tvm compiler compiles
-these operators, it will query this log file to get the best knob values.
-
-We also released pre-tuned parameters for some NVIDIA GPUs. You can go to
-`NVIDIA GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#nvidia-gpu>`_
-to see the results.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user psutil xgboost tornado
-#
-# To make tvm run faster during tuning, it is recommended to use cython
-# as FFI of tvm. In the root directory of tvm, execute:
-#
-# .. code-block:: bash
-#
-#   pip3 install --user cython
-#   sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.util import tempdir
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define Network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-
-def get_network(name, batch_size):
-    """Get the symbol definition and random weight of a network"""
-    input_shape = (batch_size, 3, 224, 224)
-    output_shape = (batch_size, 1000)
-
-    if "resnet" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif "vgg" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif name == 'mobilenet':
-        net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
-    elif name == 'squeezenet_v1.1':
-        net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
-    elif name == 'inception_v3':
-        input_shape = (1, 3, 299, 299)
-        net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
-    elif name == 'custom':
-        # an example for custom network
-        from nnvm.testing import utils
-        net = nnvm.sym.Variable('data')
-        net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
-        net = nnvm.sym.flatten(net)
-        net = nnvm.sym.dense(net, units=1000)
-        net, params = utils.create_workload(net, batch_size, (3, 224, 224))
-    elif name == 'mxnet':
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-        block = get_model('resnet18_v1', pretrained=True)
-        net, params = nnvm.frontend.from_mxnet(block)
-        net = nnvm.sym.softmax(net)
-    else:
-        raise ValueError("Unsupported network: " + name)
-
-    return net, params, input_shape, output_shape
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we apply some configurations.
-
-#### DEVICE CONFIG ####
-target = tvm.target.cuda()
-
-#### TUNING OPTION ####
-network = 'resnet-18'
-log_file = "%s.log" % network
-dtype = 'float32'
-
-tuning_option = {
-    'log_filename': log_file,
-
-    'tuner': 'xgb',
-    'n_trial': 2000,
-    'early_stopping': 600,
-
-    'measure_option': autotvm.measure_option(
-        builder=autotvm.LocalBuilder(timeout=10),
-        runner=autotvm.LocalRunner(number=20, repeat=3, timeout=4, min_repeat_ms=150),
-    ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-#   In general, the default value provided here works well.
-#
-#   If you have large time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,
-#   which makes the tuning runs longer.
-#
-#   If you have multiple devices, you can use all of them for measurement to
-#   accelerate the tuning process. (see the 'Scale up measurement` section below).
-#
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(tasks,
-               measure_option,
-               tuner='xgb',
-               n_trial=1000,
-               early_stopping=None,
-               log_filename='tuning.log',
-               use_transfer_learning=True,
-               try_winograd=True):
-    if try_winograd:
-        for i in range(len(tasks)):
-            try:  # try winograd template
-                tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
-                                          tasks[i].target, tasks[i].target_host, 'winograd')
-                input_channel = tsk.workload[1][1]
-                if input_channel >= 64:
-                    tasks[i] = tsk
-            except Exception:
-                pass
-
-    # create tmp log file
-    tmp_log_file = log_filename + ".tmp"
-    if os.path.exists(tmp_log_file):
-        os.remove(tmp_log_file)
-
-    for i, tsk in enumerate(reversed(tasks)):
-        prefix = "[Task %2d/%2d] " %(i+1, len(tasks))
-
-        # create tuner
-        if tuner == 'xgb' or tuner == 'xgb-rank':
-            tuner_obj = XGBTuner(tsk, loss_type='rank')
-        elif tuner == 'ga':
-            tuner_obj = GATuner(tsk, pop_size=100)
-        elif tuner == 'random':
-            tuner_obj = RandomTuner(tsk)
-        elif tuner == 'gridsearch':
-            tuner_obj = GridSearchTuner(tsk)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        if use_transfer_learning:
-            if os.path.isfile(tmp_log_file):
-                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
-        # do tuning
-        n_trial = min(n_trial, len(tsk.config_space))
-        tuner_obj.tune(n_trial=n_trial,
-                       early_stopping=early_stopping,
-                       measure_option=measure_option,
-                       callbacks=[
-                           autotvm.callback.progress_bar(n_trial, prefix=prefix),
-                           autotvm.callback.log_to_file(tmp_log_file)])
-
-    # pick best records to a cache file
-    autotvm.record.pick_best(tmp_log_file, log_filename)
-    os.remove(tmp_log_file)
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
-    # extract workloads from nnvm graph
-    print("Extract tasks...")
-    net, params, input_shape, out_shape = get_network(network, batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d,))
-
-    # run tuning tasks
-    print("Tuning...")
-    tune_tasks(tasks, **tuning_opt)
-
-    # compile kernels with history best records
-    with autotvm.apply_history_best(log_file):
-        print("Compile...")
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(
-                net, target=target, shape={'data': input_shape}, params=params, dtype=dtype)
-
-        # export library
-        tmp = tempdir()
-        filename = "net.tar"
-        lib.export_library(tmp.relpath(filename))
-
-        # load parameters
-        ctx = tvm.context(str(target), 0)
-        module = runtime.create(graph, lib, ctx)
-        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
-        module.set_input('data', data_tvm)
-        module.set_input(**params)
-
-        # evaluate
-        print("Evaluate inference time cost...")
-        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
-        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
-        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
-              (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended. One sample output is listed below.
-# It takes about 4 hours to get the following output on a 32T AMD Ryzen Threadripper.
-# The tuning target is NVIDIA 1080 Ti.
-# (You can see some errors during compilation. If the tuning is not stuck, it is okay.)
-#
-# .. code-block:: bash
-#
-#    Extract tasks...
-#    Tuning...
-#    [Task  1/12]  Current/Best:  541.83/3570.66 GFLOPS | Progress: (960/2000) | 1001.31 s Done.
-#    [Task  2/12]  Current/Best:    0.56/ 803.33 GFLOPS | Progress: (704/2000) | 608.08 s Done.
-#    [Task  3/12]  Current/Best:  103.69/1141.25 GFLOPS | Progress: (768/2000) | 702.13 s Done.
-#    [Task  4/12]  Current/Best: 2905.03/3925.15 GFLOPS | Progress: (864/2000) | 745.94 sterminate called without an active exception
-#    [Task  4/12]  Current/Best: 2789.36/3925.15 GFLOPS | Progress: (1056/2000) | 929.40 s Done.
-#    [Task  5/12]  Current/Best:   89.06/1076.24 GFLOPS | Progress: (704/2000) | 601.73 s Done.
-#    [Task  6/12]  Current/Best:   40.39/2129.02 GFLOPS | Progress: (1088/2000) | 1125.76 s Done.
-#    [Task  7/12]  Current/Best: 4090.53/5007.02 GFLOPS | Progress: (800/2000) | 903.90 s Done.
-#    [Task  8/12]  Current/Best:    4.78/1272.28 GFLOPS | Progress: (768/2000) | 749.14 s Done.
-#    [Task  9/12]  Current/Best: 1391.45/2325.08 GFLOPS | Progress: (992/2000) | 1084.87 s Done.
-#    [Task 10/12]  Current/Best: 1995.44/2383.59 GFLOPS | Progress: (864/2000) | 862.60 s Done.
-#    [Task 11/12]  Current/Best: 4093.94/4899.80 GFLOPS | Progress: (224/2000) | 240.92 sterminate called without an active exception
-#    [Task 11/12]  Current/Best: 3487.98/4909.91 GFLOPS | Progress: (480/2000) | 534.96 sterminate called without an active exception
-#    [Task 11/12]  Current/Best: 4636.84/4912.17 GFLOPS | Progress: (1184/2000) | 1381.16 sterminate called without an active exception
-#    [Task 11/12]  Current/Best:   50.12/4912.17 GFLOPS | Progress: (1344/2000) | 1602.81 s Done.
-#    [Task 12/12]  Current/Best: 3581.31/4286.30 GFLOPS | Progress: (736/2000) | 943.52 s Done.
-#    Compile...
-#    Evaluate inference time cost...
-#    Mean inference time (std dev): 1.07 ms (0.05 ms)
-#
-# As a reference baseline, the time cost of MXNet + TensorRT on resnet-18 is 1.30ms. So we are a little faster.
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-#   The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS",
-#   then there must be something wrong.
-#
-#   First, make sure you set the correct configuration of your device.
-#   Then, you can print debug information by adding these lines in the beginning
-#   of the script. It will print every measurement result, where you can find useful
-#   error messages.
-#
-#   .. code-block:: python
-#
-#      import logging
-#      logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
-
-
-#################################################################
-# Scale up measurement by using multiple devices
-# ----------------------------------------------
-#
-# If you have multiple devices, you can use all of them for measurement.
-# TVM uses the RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized master node. We can register all devices to
-# the tracker. For example, if we have 10 GPU cards, we can register all of them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal for
-# this command:
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is
-#
-# .. code-block:: bash
-#
-#   INFO:RPCTracker:bind to 0.0.0.0:9190
-#
-# Then open another new terminal for the RPC server. We need to start one server
-# for each dedicated device. We use a string key to distinguish the types of devices.
-# You can pick a name you like.
-# (Note: For rocm backend, there are some internal errors with the compiler,
-# we need to add `--no-fork` to the argument list.)
-#
-# .. code-block:: bash
-#
-#     python -m tvm.exec.rpc_server --tracker=localhost:9190 --key=1080ti
-#
-# After registering devices, we can confirm it by querying rpc_tracker
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.query_rpc_tracker --host=localhost --port=9190
-#
-# For example, if we have four 1080ti, two titanx and one gfx900, the output can be
-#
-# .. code-block:: bash
-#
-#    Queue Status
-#    ----------------------------------
-#    key          total  free  pending
-#    ----------------------------------
-#    1080ti       4      4     0
-#    titanx       2      2     0
-#    gfx900       1      1     0
-#    ----------------------------------
-#
-# Finally, we need to change the tuning option to use RPCRunner. Use the code below
-# to replace the corresponding part above.
-
-tuning_option = {
-    'log_filename': log_file,
-
-    'tuner': 'xgb',
-    'n_trial': 2000,
-    'early_stopping': 600,
-
-    'measure_option': autotvm.measure_option(
-        builder=autotvm.LocalBuilder(timeout=10),
-        runner=autotvm.RPCRunner(
-            '1080ti',  # change the device key to your key
-            'localhost', 9190,
-            number=20, repeat=3, timeout=4, min_repeat_ms=150),
-    ),
-}
diff --git a/nnvm/tutorials/tune_nnvm_mobile_gpu.py b/nnvm/tutorials/tune_nnvm_mobile_gpu.py
deleted file mode 100644
index 8946dc1833bd..000000000000
--- a/nnvm/tutorials/tune_nnvm_mobile_gpu.py
+++ /dev/null
@@ -1,416 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for Mobile GPU (NNVM)
-=========================================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_
-
-Auto-tuning for a specific device is critical for getting the best
-performance. This is a tutorial about how to tune a whole convolutional
-network.
-
-The operator implementation for Mobile GPU in TVM is written in template form.
-The template has many tunable knobs (tile factor, vectorization, unrolling, etc).
-We will tune all convolution, depthwise convolution and dense operators
-in the neural network. After tuning, we produce a log file which stores
-the best knob values for all required operators. When the tvm compiler compiles
-these operators, it will query this log file to get the best knob values.
-
-We also released pre-tuned parameters for some arm devices. You can go to
-`Mobile GPU Benchmark <https://github.com/apache/incubator-tvm/wiki/Benchmark#mobile-gpu>`_
-to see the results.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user psutil xgboost tornado
-#
-# To make tvm run faster during tuning, it is recommended to use cython
-# as FFI of tvm. In the root directory of tvm, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user cython
-#   sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib.util import tempdir
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-
-def get_network(name, batch_size):
-    """Get the symbol definition and random weight of a network"""
-    input_shape = (batch_size, 3, 224, 224)
-    output_shape = (batch_size, 1000)
-
-    if "resnet" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif "vgg" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif name == 'mobilenet':
-        net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
-    elif name == 'squeezenet_v1.1':
-        net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
-    elif name == 'inception_v3':
-        input_shape = (1, 3, 299, 299)
-        net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
-    elif name == 'custom':
-        # an example for custom network
-        from nnvm.testing import utils
-        net = nnvm.sym.Variable('data')
-        net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
-        net = nnvm.sym.flatten(net)
-        net = nnvm.sym.dense(net, units=1000)
-        net, params = utils.create_workload(net, batch_size, (3, 224, 224))
-    elif name == 'mxnet':
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-        block = get_model('resnet18_v1', pretrained=True)
-        net, params = nnvm.frontend.from_mxnet(block)
-        net = nnvm.sym.softmax(net)
-    else:
-        raise ValueError("Unsupported network: " + name)
-
-    return net, params, input_shape, output_shape
-
-
-#################################################################
-# Start RPC Tracker
-# -----------------
-# TVM uses RPC session to communicate with ARM boards.
-# During tuning, the tuner will send the generated code to the board and
-# measure the speed of code on the board.
-#
-# To scale up the tuning, TVM uses RPC Tracker to manage distributed devices.
-# The RPC Tracker is a centralized master node. We can register all devices to
-# the tracker. For example, if we have 10 phones, we can register all of them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal for
-# this command:
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is
-#
-# .. code-block:: bash
-#
-#   INFO:RPCTracker:bind to 0.0.0.0:9190
-
-#################################################################
-# Register devices to RPC Tracker
-# -----------------------------------
-# Now we can register our devices to the tracker. The first step is to
-# build tvm runtime for the ARM devices.
-#
-# * For Linux:
-#   Follow this section :ref:`build-tvm-runtime-on-device` to build
-#   tvm runtime on the device. Then register the device to tracker by
-#
-#   .. code-block:: bash
-#
-#     python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=rk3399
-#
-#   (replace :code:`[HOST_IP]` with the IP address of your host machine)
-#
-# * For Android:
-#   Follow this `readme page <https://github.com/apache/incubator-tvm/tree/master/apps/android_rpc>`_ to
-#   install tvm rpc apk on the android device. Make sure you can pass the android rpc test.
-#   Then you have already registred your device. During tuning, you have to go to developer option
-#   and enable "Keep screen awake during changing" and charge your phone to make it stable.
-#
-# After registering devices, we can confirm it by querying rpc_tracker
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190
-#
-# For example, if we have 2 Huawei mate10 pro, 11 Raspberry Pi 3B and 2 rk3399,
-# the output can be
-#
-# .. code-block:: bash
-#
-#    Queue Status
-#    ----------------------------------
-#    key          total  free  pending
-#    ----------------------------------
-#    mate10pro    2      2     0
-#    rk3399       2      2     0
-#    rpi3b        11     11    0
-#    ----------------------------------
-#
-# You can register multiple devices to the tracker to accelerate the measurement in tuning.
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations. Here I use an RK3399 board
-# as example. In your setting, you should modify the target and device_key accordingly.
-# set :code:`use_android` to True if you use android phone.
-
-#### DEVICE CONFIG ####
-
-target = tvm.target.create('opencl -device=mali')
-
-# Replace "aarch64-linux-gnu" with the correct target of your board.
-# This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device.
-target_host = 'llvm -target=aarch64-linux-gnu'
-
-# Also replace this with the device key in your tracker
-device_key = 'rk3399'
-
-# Set this to True if you use android phone
-use_android = False
-
-#### TUNING OPTION ####
-network = 'resnet-18'
-log_file = "%s.%s.log" % (device_key, network)
-dtype = 'float32'
-
-tuning_option = {
-    'log_filename': log_file,
-
-    'tuner': 'xgb',
-    'n_trial': 1000,
-    'early_stopping': 450,
-
-    'measure_option': autotvm.measure_option(
-        builder=autotvm.LocalBuilder(
-            build_func='ndk' if use_android else 'default'),
-        runner=autotvm.RPCRunner(
-            device_key, host='localhost', port=9190,
-            number=10,
-            timeout=5,
-        ),
-    ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-#   In general, the default values provided here work well.
-#   If you have enough time budget, you can set :code:`n_trial`, :code:`early_stopping` larger,
-#   which makes the tuning run longer.
-#   If your device runs very slow or your conv2d operators have many GFLOPs, considering to
-#   set timeout larger.
-#
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(tasks,
-               measure_option,
-               tuner='xgb',
-               n_trial=1000,
-               early_stopping=None,
-               log_filename='tuning.log',
-               use_transfer_learning=True,
-               try_winograd=True):
-    if try_winograd:
-        for i in range(len(tasks)):
-            try:  # try winograd template
-                tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
-                                          tasks[i].target, tasks[i].target_host, 'winograd')
-                tasks.append(tsk)
-            except Exception:
-                pass
-
-    # create tmp log file
-    tmp_log_file = log_filename + ".tmp"
-    if os.path.exists(tmp_log_file):
-        os.remove(tmp_log_file)
-
-    for i, tsk in enumerate(reversed(tasks)):
-        prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
-
-        # create tuner
-        if tuner == 'xgb' or tuner == 'xgb-rank':
-            tuner_obj = XGBTuner(tsk, loss_type='rank')
-        elif tuner == 'ga':
-            tuner_obj = GATuner(tsk, pop_size=50)
-        elif tuner == 'random':
-            tuner_obj = RandomTuner(tsk)
-        elif tuner == 'gridsearch':
-            tuner_obj = GridSearchTuner(tsk)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        if use_transfer_learning:
-            if os.path.isfile(tmp_log_file):
-                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
-        # do tuning
-        n_trial = min(n_trial, len(tsk.config_space))
-        tuner_obj.tune(n_trial=n_trial,
-                       early_stopping=early_stopping,
-                       measure_option=measure_option,
-                       callbacks=[
-                           autotvm.callback.progress_bar(n_trial, prefix=prefix),
-                           autotvm.callback.log_to_file(tmp_log_file)])
-
-    # pick best records to a cache file
-    autotvm.record.pick_best(tmp_log_file, log_filename)
-    os.remove(tmp_log_file)
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
-    # extract workloads from nnvm graph
-    print("Extract tasks...")
-    net, params, input_shape, out_shape = get_network(network, batch_size=1)
-    tasks = autotvm.task.extract_from_graph(net, target=target, target_host=target_host,
-                                            shape={'data': input_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d, nnvm.sym.dense))
-
-    # run tuning tasks
-    print("Tuning...")
-    tune_tasks(tasks, **tuning_opt)
-
-    # compile kernels with history best records
-    with autotvm.apply_history_best(log_file):
-        print("Compile...")
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(
-                net, target=target, target_host=target_host,
-                shape={'data': input_shape}, params=params, dtype=dtype)
-
-        # export library
-        tmp = tempdir()
-        if use_android:
-            from tvm.contrib import ndk
-            filename = "net.so"
-            lib.export_library(tmp.relpath(filename), ndk.create_shared)
-        else:
-            filename = "net.tar"
-            lib.export_library(tmp.relpath(filename))
-
-        # upload module to device
-        print("Upload...")
-        remote = autotvm.measure.request_remote(device_key, 'localhost', 9190,
-                                                timeout=10000)
-        remote.upload(tmp.relpath(filename))
-        rlib = remote.load_module(filename)
-
-        # upload parameters to device
-        ctx = remote.context(str(target), 0)
-        module = runtime.create(graph, rlib, ctx)
-        data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
-        module.set_input('data', data_tvm)
-        module.set_input(**params)
-
-        # evaluate
-        print("Evaluate inference time cost...")
-        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=30)
-        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
-        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
-              (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below. It takes about 3 hours on a 32T AMD Ryzen Threadripper.
-#
-# .. code-block:: bash
-#
-#    Extract tasks...
-#    Tuning...
-#    [Task  1/17]  Current/Best:   25.30/  39.12 GFLOPS | Progress: (992/1000) | 751.22 s Done.
-#    [Task  2/17]  Current/Best:   40.70/  45.50 GFLOPS | Progress: (736/1000) | 545.46 s Done.
-#    [Task  3/17]  Current/Best:   38.83/  42.35 GFLOPS | Progress: (992/1000) | 1549.85 s Done.
-#    [Task  4/17]  Current/Best:   23.31/  31.02 GFLOPS | Progress: (640/1000) | 1059.31 s Done.
-#    [Task  5/17]  Current/Best:    0.06/   2.34 GFLOPS | Progress: (544/1000) | 305.45 s Done.
-#    [Task  6/17]  Current/Best:   10.97/  17.20 GFLOPS | Progress: (992/1000) | 1050.00 s Done.
-#    [Task  7/17]  Current/Best:    8.98/  10.94 GFLOPS | Progress: (928/1000) | 421.36 s Done.
-#    [Task  8/17]  Current/Best:    4.48/  14.86 GFLOPS | Progress: (704/1000) | 582.60 s Done.
-#    [Task  9/17]  Current/Best:   10.30/  25.99 GFLOPS | Progress: (864/1000) | 899.85 s Done.
-#    [Task 10/17]  Current/Best:   11.73/  12.52 GFLOPS | Progress: (608/1000) | 304.85 s Done.
-#    [Task 11/17]  Current/Best:   15.26/  18.68 GFLOPS | Progress: (800/1000) | 747.52 s Done.
-#    [Task 12/17]  Current/Best:   17.48/  26.71 GFLOPS | Progress: (1000/1000) | 1166.40 s Done.
-#    [Task 13/17]  Current/Best:    0.96/  11.43 GFLOPS | Progress: (960/1000) | 611.65 s Done.
-#    [Task 14/17]  Current/Best:   17.88/  20.22 GFLOPS | Progress: (672/1000) | 670.29 s Done.
-#    [Task 15/17]  Current/Best:   11.62/  13.98 GFLOPS | Progress: (736/1000) | 449.25 s Done.
-#    [Task 16/17]  Current/Best:   19.90/  23.83 GFLOPS | Progress: (608/1000) | 708.64 s Done.
-#    [Task 17/17]  Current/Best:   17.98/  22.75 GFLOPS | Progress: (736/1000) | 1122.60 s Done.
-#    Compile...
-#    Upload...
-#    Evaluate inference time cost...
-#    Mean inference time (std dev): 128.05 ms (7.74 ms)
-#
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-#   The auto tuning module is error-prone. If you always see " 0.00/ 0.00 GFLOPS",
-#   then there must be something wrong.
-#
-#   First, make sure you set the correct configuration of your device.
-#   Then, you can print debug information by adding these lines in the beginning
-#   of the script. It will print every measurement result, where you can find useful
-#   error messages.
-#
-#   .. code-block:: python
-#
-#      import logging
-#      logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-#   Finally, always feel free to ask our community for help on https://discuss.tvm.ai
diff --git a/nnvm/tutorials/tune_nnvm_x86.py b/nnvm/tutorials/tune_nnvm_x86.py
deleted file mode 100644
index b7426271f06b..000000000000
--- a/nnvm/tutorials/tune_nnvm_x86.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network for x86 CPU (NNVM)
-======================================================
-**Author**: `Yao Wang <https://github.com/kevinthesun>`_
-
-This is a tutorial about how to tune convolution neural network
-for x86 cpu.
-"""
-import os
-import numpy as np
-
-import nnvm.testing
-import nnvm.compiler
-import tvm
-from tvm import autotvm
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-import tvm.contrib.graph_runtime as runtime
-
-#################################################################
-# Define network
-# --------------
-# First we need to define the network in nnvm symbol API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
-# We can also load models from MXNet, ONNX and TensorFlow (see NNVM
-# tutorials :ref:`tutorial-nnvm` for more details).
-#
-# In this tutorial, we choose resnet-18 as tuning example.
-
-def get_network(name, batch_size):
-    """Get the symbol definition and random weight of a network"""
-    input_shape = (batch_size, 3, 224, 224)
-    output_shape = (batch_size, 1000)
-
-    if "resnet" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif "vgg" in name:
-        n_layer = int(name.split('-')[1])
-        net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
-    elif name == 'mobilenet':
-        net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
-    elif name == 'squeezenet_v1.1':
-        net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version='1.1')
-    elif name == 'inception_v3':
-        input_shape = (1, 3, 299, 299)
-        net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
-    elif name == 'custom':
-        # an example for custom network
-        from nnvm.testing import utils
-        net = nnvm.sym.Variable('data')
-        net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
-        net = nnvm.sym.flatten(net)
-        net = nnvm.sym.dense(net, units=1000)
-        net, params = utils.create_workload(net, batch_size, (3, 224, 224))
-    elif name == 'mxnet':
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-        block = get_model('resnet18_v1', pretrained=True)
-        net, params = nnvm.frontend.from_mxnet(block)
-        net = nnvm.sym.softmax(net)
-    else:
-        raise ValueError("Unsupported network: " + name)
-
-    return net, params, input_shape, output_shape
-
-# Replace "llvm" with the correct target of your cpu.
-# For example, for AWS EC2 c5 instance with Intel Xeon
-# Platinum 8000 series, the target should be "llvm -mcpu=skylake-avx512".
-# For AWS EC2 c4 instance with Intel Xeon E5-2666 v3, it should be
-# "llvm -mcpu=core-avx2".
-target = "llvm"
-
-batch_size = 1
-dtype = "float32"
-model_name = "resnet-18"
-log_file = "%s.log" % model_name
-
-# Set number of threads used for tuning based on the number of
-# physical cpu cores on your machine.
-num_threads = 1
-os.environ["TVM_NUM_THREADS"] = str(num_threads)
-
-
-#################################################################
-# Configure tensor tuning settings and create tasks
-# -------------------------------------------------
-# To get better kernel execution performance on x86 cpu,
-# we need to change data layout of convolution kernel from
-# "NCHW" to "NCHWc". To deal with this situation, we define
-# conv2d_NCHWc operator in topi. We will tune this operator
-# instead of plain conv2d.
-#
-# We will use local mode for tuning configuration. RPC tracker
-# mode can be setup similarly to the approach in
-# :ref:`tune_nnvm_arm` tutorial.
-
-tuning_option = {
-    'log_filename': log_file,
-    'tuner': 'random',
-    'early_stopping': None,
-
-    'measure_option': autotvm.measure_option(
-        builder=autotvm.LocalBuilder(),
-        runner=autotvm.LocalRunner(number=10, repeat=1,
-                                   min_repeat_ms=1000),
-    ),
-}
-
-# You can skip the implementation of this function for this tutorial.
-def tune_kernels(tasks,
-                 measure_option,
-                 tuner='gridsearch',
-                 early_stopping=None,
-                 log_filename='tuning.log'):
-
-    for i, tsk in enumerate(tasks):
-        prefix = "[Task %2d/%2d] " % (i+1, len(tasks))
-
-        # converting conv2d tasks to conv2d_NCHWc tasks
-        op_name = tsk.workload[0]
-        if op_name == 'conv2d':
-            func_create = 'topi_x86_conv2d_NCHWc'
-        elif op_name == 'depthwise_conv2d_nchw':
-            func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
-        else:
-            raise ValueError("Tuning {} is not supported on x86".format(op_name))
-
-        task = autotvm.task.create(func_create, args=tsk.args,
-                                   target=target, template_key='direct')
-        task.workload = tsk.workload
-
-        # create tuner
-        if tuner == 'xgb' or tuner == 'xgb-rank':
-            tuner_obj = XGBTuner(task, loss_type='rank')
-        elif tuner == 'ga':
-            tuner_obj = GATuner(task, pop_size=50)
-        elif tuner == 'random':
-            tuner_obj = RandomTuner(task)
-        elif tuner == 'gridsearch':
-            tuner_obj = GridSearchTuner(task)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        # do tuning
-        n_trial=len(task.config_space)
-        tuner_obj.tune(n_trial=n_trial,
-                       early_stopping=early_stopping,
-                       measure_option=measure_option,
-                       callbacks=[
-                           autotvm.callback.progress_bar(n_trial, prefix=prefix),
-                           autotvm.callback.log_to_file(log_filename)])
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-def tune_and_evaluate(tuning_opt):
-    # extract workloads from nnvm graph
-    print("Extract tasks...")
-    net, params, data_shape, out_shape = get_network(model_name, batch_size)
-    tasks = autotvm.task.extract_from_graph(net, target=target,
-                                            shape={'data': data_shape}, dtype=dtype,
-                                            symbols=(nnvm.sym.conv2d,))
-
-    # run tuning tasks
-    print("Tuning...")
-    tune_kernels(tasks, **tuning_opt)
-
-    # compile kernels with history best records
-    with autotvm.apply_history_best(log_file):
-        print("Compile...")
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(
-                net, target=target, shape={'data': data_shape}, params=params, dtype=dtype)
-
-        # upload parameters to device
-        ctx = tvm.cpu()
-        data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
-        module = runtime.create(graph, lib, ctx)
-        module.set_input('data', data_tvm)
-        module.set_input(**params)
-
-        # evaluate
-        print("Evaluate inference time cost...")
-        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
-        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
-        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
-              (np.mean(prof_res), np.std(prof_res)))
-
-# We do not run the tuning in our webpage server since it takes too long.
-# Uncomment the following line to run it by yourself.
-
-# tune_and_evaluate(tuning_option)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below.
-#
-# .. code-block:: bash
-#
-#    Extract tasks...
-#    Tuning...
-#    [Task  1/12]  Current/Best:  598.05/2497.63 GFLOPS | Progress: (252/252) | 1357.95 s Done.
-#    [Task  2/12]  Current/Best:  522.63/2279.24 GFLOPS | Progress: (784/784) | 3989.60 s Done.
-#    [Task  3/12]  Current/Best:  447.33/1927.69 GFLOPS | Progress: (784/784) | 3869.14 s Done.
-#    [Task  4/12]  Current/Best:  481.11/1912.34 GFLOPS | Progress: (672/672) | 3274.25 s Done.
-#    [Task  5/12]  Current/Best:  414.09/1598.45 GFLOPS | Progress: (672/672) | 2720.78 s Done.
-#    [Task  6/12]  Current/Best:  508.96/2273.20 GFLOPS | Progress: (768/768) | 3718.75 s Done.
-#    [Task  7/12]  Current/Best:  469.14/1955.79 GFLOPS | Progress: (576/576) | 2665.67 s Done.
-#    [Task  8/12]  Current/Best:  230.91/1658.97 GFLOPS | Progress: (576/576) | 2435.01 s Done.
-#    [Task  9/12]  Current/Best:  487.75/2295.19 GFLOPS | Progress: (648/648) | 3009.95 s Done.
-#    [Task 10/12]  Current/Best:  182.33/1734.45 GFLOPS | Progress: (360/360) | 1755.06 s Done.
-#    [Task 11/12]  Current/Best:  372.18/1745.15 GFLOPS | Progress: (360/360) | 1684.50 s Done.
-#    [Task 12/12]  Current/Best:  215.34/2271.11 GFLOPS | Progress: (400/400) | 2128.74 s Done.
-#    Compile...
-#    Evaluate inference time cost...
-#    Mean inference time (std dev): 3.16 ms (0.03 ms)
diff --git a/nnvm/tutorials/using_external_lib.py b/nnvm/tutorials/using_external_lib.py
deleted file mode 100644
index cc52652ffa37..000000000000
--- a/nnvm/tutorials/using_external_lib.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Using External Libraries in NNVM
-================================
-**Author**: `Masahiro Masuda <https://github.com/masahi>`_
-
-This is a short tutorial on how to use external libraries such as cuDNN, or cuBLAS with NNVM.
-
-NNVM uses TVM internally to generate target specific code. For example, with cuda backend TVM generates cuda kernels for all layers in the user provided network.
-But sometimes it is also helpful to incorporate external libraries developed by various vendors into NNVM.
-Luckily, TVM has a mechanism to transparently call into these libraries.
-For NNVM users, all we need to do is just to set a target string appropriately.
-
-Before we can use external libraries from NNVM, your TVM needs to be built with libraries you want to use.
-For example, to use cuDNN, USE_CUDNN option in tvm/make/config.mk needs to be enabled, and cuDNN include and library directories need to be specified.
-
-To begin with, we import NNVM and TVM.
-"""
-import tvm
-import numpy as np
-from tvm.contrib import graph_runtime as runtime
-import nnvm.symbol as sym
-import nnvm.compiler
-from nnvm.testing import utils
-
-######################################################################
-# Create a simple network
-# -----------------------
-# Let's create a very simple network for demonstration.
-# It consists of convolution, batch normalization, and ReLU activation.
-
-out_channels = 16
-data = sym.Variable(name="data")
-simple_net = sym.conv2d(data=data, kernel_size=(3,3), channels=out_channels, padding = (1, 1), use_bias=True)
-simple_net = sym.batch_norm(data=simple_net)
-simple_net = sym.relu(data=simple_net)
-
-batch_size = 1
-data_shape = (batch_size, 3, 224, 224)
-net, params = utils.create_workload(simple_net, batch_size, data_shape[1:])
-
-######################################################################
-# Build and run with cuda backend
-# -------------------------------
-# We build and run this network with cuda backend, as usual.
-# By setting the logging level to DEBUG, the result of NNVM graph compilation will be dumped as pseudo code.
-import logging
-logging.basicConfig(level=logging.DEBUG) # to dump TVM IR after fusion
-
-target = "cuda"
-graph, lib, params = nnvm.compiler.build(
-    net, target, shape={"data": data_shape}, params=params)
-
-ctx = tvm.context(target, 0)
-data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
-module = runtime.create(graph, lib, ctx)
-module.set_input(**params)
-module.set_input("data", data)
-module.run()
-out_shape = (batch_size, out_channels, 224, 224)
-out = module.get_output(0, tvm.nd.empty(out_shape))
-out_cuda = out.asnumpy()
-
-######################################################################
-# The generated pseudo code should look something like below.
-# Note how bias add, batch normalization, and ReLU activation are fused into the convolution kernel.
-# TVM generates a single, fused kernel from this representation.
-#
-# .. code-block:: text
-#
-#       produce compute {
-#         // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 112
-#         // attr [input1.shared] storage_scope = "shared"
-#         allocate input1.shared[float32 * 16 * 3 * 3 * 3]
-#         // attr [compute] storage_scope = "local"
-#         allocate compute[float32 * 16 * 1 * 1 * 1 * 1]
-#         // attr [pad_temp.global.global.shared] storage_scope = "shared"
-#         allocate pad_temp.global.global.shared[float32 * 1 * 1 * 4 * 57 * 4]
-#         // attr [iter_var(threadIdx.x, Range(min=0, extent=448), threadIdx.x)] thread_extent = 448
-#         produce compute {
-#           produce input1.shared {
-#             for (ax0, 0, 16) {
-#               if (likely((threadIdx.x < 27))) {
-#                 input1.shared[(threadIdx.x + (ax0*27))] = input1[((((((blockIdx.x/112)*48) + (threadIdx.x/9))*9) + (threadIdx.x % 9)) + (ax0*27))]
-#               }
-#             }
-#           }
-#           compute[0] = 0.000000f
-#           compute[1] = 0.000000f
-#           compute[2] = 0.000000f
-#           compute[3] = 0.000000f
-#           compute[4] = 0.000000f
-#           compute[5] = 0.000000f
-#           compute[6] = 0.000000f
-#           compute[7] = 0.000000f
-#           compute[8] = 0.000000f
-#           compute[9] = 0.000000f
-#           compute[10] = 0.000000f
-#           compute[11] = 0.000000f
-#           compute[12] = 0.000000f
-#           compute[13] = 0.000000f
-#           compute[14] = 0.000000f
-#           compute[15] = 0.000000f
-#           for (rc, 0, 3) {
-#             produce pad_temp.global.global.shared {
-#               if (likely((threadIdx.x < 228))) {
-#                 if (likely(((blockIdx.x*2) < (226 - (threadIdx.x/57))))) {
-#                   pad_temp.global.global.shared[ramp((threadIdx.x*4), 1, 4)] = pad_temp[ramp(((((((blockIdx.x*2) + (threadIdx.x/57))*57) + (threadIdx.x % 57)) + (rc*12882))*4), 1, 4)]
-#                 }
-#               }
-#             }
-#             for (ry, 0, 3) {
-#               for (rx, 0, 3) {
-#                 compute[0] = (compute[0] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[((((rc*3) + ry)*3) + rx)]))
-#                 compute[1] = (compute[1] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 27)]))
-#                 compute[2] = (compute[2] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 54)]))
-#                 compute[3] = (compute[3] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 81)]))
-#                 compute[4] = (compute[4] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 108)]))
-#                 compute[5] = (compute[5] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 135)]))
-#                 compute[6] = (compute[6] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 162)]))
-#                 compute[7] = (compute[7] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 189)]))
-#                 compute[8] = (compute[8] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 216)]))
-#                 compute[9] = (compute[9] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 243)]))
-#                 compute[10] = (compute[10] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 270)]))
-#                 compute[11] = (compute[11] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 297)]))
-#                 compute[12] = (compute[12] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 324)]))
-#                 compute[13] = (compute[13] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 351)]))
-#                 compute[14] = (compute[14] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 378)]))
-#                 compute[15] = (compute[15] + (pad_temp.global.global.shared[(((((threadIdx.x/224)*228) + (threadIdx.x % 224)) + (ry*228)) + rx)]*input1.shared[(((((rc*3) + ry)*3) + rx) + 405)]))
-#               }
-#             }
-#           }
-#         }
-#         compute[(((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224))] = max((((compute[0] + input2[((blockIdx.x/112)*16)])*input3[((blockIdx.x/112)*16)]) + input4[((blockIdx.x/112)*16)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 50176)] = max((((compute[1] + input2[(((blockIdx.x/112)*16) + 1)])*input3[(((blockIdx.x/112)*16) + 1)]) + input4[(((blockIdx.x/112)*16) + 1)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 100352)] = max((((compute[2] + input2[(((blockIdx.x/112)*16) + 2)])*input3[(((blockIdx.x/112)*16) + 2)]) + input4[(((blockIdx.x/112)*16) + 2)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 150528)] = max((((compute[3] + input2[(((blockIdx.x/112)*16) + 3)])*input3[(((blockIdx.x/112)*16) + 3)]) + input4[(((blockIdx.x/112)*16) + 3)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 200704)] = max((((compute[4] + input2[(((blockIdx.x/112)*16) + 4)])*input3[(((blockIdx.x/112)*16) + 4)]) + input4[(((blockIdx.x/112)*16) + 4)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 250880)] = max((((compute[5] + input2[(((blockIdx.x/112)*16) + 5)])*input3[(((blockIdx.x/112)*16) + 5)]) + input4[(((blockIdx.x/112)*16) + 5)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 301056)] = max((((compute[6] + input2[(((blockIdx.x/112)*16) + 6)])*input3[(((blockIdx.x/112)*16) + 6)]) + input4[(((blockIdx.x/112)*16) + 6)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 351232)] = max((((compute[7] + input2[(((blockIdx.x/112)*16) + 7)])*input3[(((blockIdx.x/112)*16) + 7)]) + input4[(((blockIdx.x/112)*16) + 7)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 401408)] = max((((compute[8] + input2[(((blockIdx.x/112)*16) + 8)])*input3[(((blockIdx.x/112)*16) + 8)]) + input4[(((blockIdx.x/112)*16) + 8)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 451584)] = max((((compute[9] + input2[(((blockIdx.x/112)*16) + 9)])*input3[(((blockIdx.x/112)*16) + 9)]) + input4[(((blockIdx.x/112)*16) + 9)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 501760)] = max((((compute[10] + input2[(((blockIdx.x/112)*16) + 10)])*input3[(((blockIdx.x/112)*16) + 10)]) + input4[(((blockIdx.x/112)*16) + 10)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 551936)] = max((((compute[11] + input2[(((blockIdx.x/112)*16) + 11)])*input3[(((blockIdx.x/112)*16) + 11)]) + input4[(((blockIdx.x/112)*16) + 11)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 602112)] = max((((compute[12] + input2[(((blockIdx.x/112)*16) + 12)])*input3[(((blockIdx.x/112)*16) + 12)]) + input4[(((blockIdx.x/112)*16) + 12)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 652288)] = max((((compute[13] + input2[(((blockIdx.x/112)*16) + 13)])*input3[(((blockIdx.x/112)*16) + 13)]) + input4[(((blockIdx.x/112)*16) + 13)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 702464)] = max((((compute[14] + input2[(((blockIdx.x/112)*16) + 14)])*input3[(((blockIdx.x/112)*16) + 14)]) + input4[(((blockIdx.x/112)*16) + 14)]), 0.000000f)
-#         compute[((((((blockIdx.x + ((blockIdx.x/112)*1792))*2) + (threadIdx.x/224))*224) + (threadIdx.x % 224)) + 752640)] = max((((compute[15] + input2[(((blockIdx.x/112)*16) + 15)])*input3[(((blockIdx.x/112)*16) + 15)]) + input4[(((blockIdx.x/112)*16) + 15)]), 0.000000f)
-#       }
-#
-
-######################################################################
-# Use cuDNN for a convolutional layer
-# -----------------------------------
-# We can use cuDNN to replace convolution kernels with cuDNN ones.
-# To do that, all we need to do is to append the option " -libs=cudnn" to the target string.
-net, params = utils.create_workload(simple_net, batch_size, data_shape[1:])
-target = "cuda -libs=cudnn" # use cudnn for convolution
-graph, lib, params = nnvm.compiler.build(
-    net, target, shape={"data": data_shape}, params=params)
-
-ctx = tvm.context(target, 0)
-data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
-module = runtime.create(graph, lib, ctx)
-module.set_input(**params)
-module.set_input("data", data)
-module.run()
-out_shape = (batch_size, out_channels, 224, 224)
-out = module.get_output(0, tvm.nd.empty(out_shape))
-out_cudnn = out.asnumpy()
-
-######################################################################
-# Note that if you use cuDNN, NNVM cannot fuse convolution with layers following it.
-# This is because layer fusion happens at the level of TVM internal representation(IR).
-# NNVM treats external libraries as black box, so there is no way to fuse them with TVM IR.
-#
-# The pseudo code below shows that cuDNN convolution + bias add + batch norm + ReLU turned into two stages of computation, one for cuDNN call and the other for the rest of operations.
-#
-# .. code-block:: text
-#
-#       allocate y[float32 * 1 * 16 * 224 * 224]
-#       produce y {
-#          // attr [0] extern_scope = 0
-#          tvm_call_packed("tvm.contrib.cudnn.conv2d.forward", 1, 0, 1, 1, 1, 1, 1, 1, 1, tvm_stack_make_array(input0, tvm_stack_make_shape(1, 3, 224, 224), 0, 4, 0.000000f, 0), tvm_stack_make_array(input1, tvm_stack_make_shape(16, 3, 3, 3), 0, 4, 0.000000f, 0), tvm_stack_make_array(y, tvm_stack_make_shape(1, 16, 224, 224), 0, 4, 0.000000f, 0))
-#        }
-#       produce compute {
-#          // attr [iter_var(blockIdx.x, , blockIdx.x)] thread_extent = 1568
-#          // attr [iter_var(threadIdx.x, , threadIdx.x)] thread_extent = 512
-#          compute[((((((blockIdx.x*512) + threadIdx.x)/50176) + ((((blockIdx.x*512) + threadIdx.x)/802816)*16))*50176) + ((((((blockIdx.x*512) + threadIdx.x)/224) % 224)*224) + (((blockIdx.x*64) + threadIdx.x) % 224)))] = max((((y[((((((blockIdx.x*512) + threadIdx.x)/50176) + ((((blockIdx.x*512) + threadIdx.x)/802816)*16))*50176) + ((((((blockIdx.x*512) + threadIdx.x)/224) % 224)*224) + (((blockIdx.x*64) + threadIdx.x) % 224)))] + input2[(((blockIdx.x*512) + threadIdx.x)/50176)])*input3[(((blockIdx.x*512) + threadIdx.x)/50176)]) + input4[(((blockIdx.x*512) + threadIdx.x)/50176)]), 0.000000f)
-#        }
-#
-
-######################################################################
-# Verify the result
-# -----------------
-# We can check that the results of two runs match.
-
-tvm.testing.assert_allclose(out_cuda, out_cudnn, rtol=1e-5)
-
-#####################################################################
-# Conclusion
-# ----------
-# This tutorial covered the usage of cuDNN with NNVM.
-# We also have support for cuBLAS. If cuBLAS is enabled, it will be used inside a fully connected layer (nnvm.symbol.dense).
-# To use cuBLAS, set a target string as "cuda -libs=cublas".
-# You can use both cuDNN and cuBLAS with "cuda -libs=cudnn,cublas".
-#
-# For ROCm backend, we have support for MIOpen and rocBLAS.
-# They can be enabled with target "rocm -libs=miopen,rocblas".
-#
-# Being able to use external libraries is great, but we need to keep in mind some cautions.
-#
-# First, the use of external libraries may restrict your usage of TVM and NNVM.
-# For example, MIOpen only supports NCHW layout and fp32 data type at the moment, so you cannot use other layouts or data type in TVM.
-#
-# Second, and more importantly, external libraries restrict the possibility of operator fusion during graph compilation, as shown above.
-# TVM and NNVM aim to achieve the best performance on a variety of hardwares, with joint operator level and graph level optimization.
-# To achieve this goal, we should continue developing better optimizations for TVM and NNVM, while using external libraries as a nice way to fall back to existing implementation when necessary.
diff --git a/nnvm/tutorials/web/resnet.html b/nnvm/tutorials/web/resnet.html
deleted file mode 100644
index 13531a3809c0..000000000000
--- a/nnvm/tutorials/web/resnet.html
+++ /dev/null
@@ -1,204 +0,0 @@
-<html>
-<!--- Licensed to the Apache Software Foundation (ASF) under one -->
-<!--- or more contributor license agreements.  See the NOTICE file -->
-<!--- distributed with this work for additional information -->
-<!--- regarding copyright ownership.  The ASF licenses this file -->
-<!--- to you under the Apache License, Version 2.0 (the -->
-<!--- "License"); you may not use this file except in compliance -->
-<!--- with the License.  You may obtain a copy of the License at -->
-
-<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
-
-<!--- Unless required by applicable law or agreed to in writing, -->
-<!--- software distributed under the License is distributed on an -->
-<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
-<!--- KIND, either express or implied.  See the License for the -->
-<!--- specific language governing permissions and limitations -->
-<!--- under the License. -->
-
-
-<head>
-  <meta charset="UTF-8">
-  <title>NNVM WebGL Test Page</title>
-</head>
-
-<body>
-  <h1>NNVM WebGL Test Page</h1>
-
-  <!-- We will draw the input image here. -->
-  <div>Input Image:</div>
-  <img id="image", src="data.png">
-
-  <!-- We need a canvas to get the image pixel data. Hide this element. -->
-  <canvas hidden id="image_canvas" width="224" height="224"></canvas>
-
-  <!-- We will write te prediction result here. -->
-  <div id="prediction"></div>
-
-  <!-- We will write all log messages here. -->
-  <div id="log">Log:</div>
-
-  <!-- The OpenGL canvas. -->
-  <canvas id="canvas"></canvas>
-
-  <script>
-    var Module = {};
-
-    // resnet.js would recognize Module["canvas"]
-    Module["canvas"] = document.getElementById("canvas");
-  </script>
-
-  <script src="resnet.js"></script>
-  <script src="tvm_runtime.js"></script>
-
-  <script>
-
-    /**
-     * Load a text file synchronously.
-     * @param {string} url The file path.
-     * @return {string} The file content.
-     */
-    function load_file(url) {
-      assert(typeof url == "string", "URL must be string");
-
-      var req = new XMLHttpRequest();
-      var result;
-      req.addEventListener("load", function() {
-        result = this.responseText;
-      });
-      req.open("get", url, false);
-      req.send();
-      return result;
-    }
-
-    /**
-     * The index of the maximum element in an array.
-     * @param {Array} The array.
-     * @return {number} The index.
-     */
-    function argmax(arr) {
-      assert(typeof arr.length == "number", "Input must be array-like");
-
-      var res = 0;
-      for (var i = 0; i < arr.length; i++) {
-        if (arr[i] > arr[res]) {
-          res = i;
-        }
-      }
-      return res;
-    }
-
-    /**
-     * Preprocess an image to fit resnet input format.
-     * @param {ImageData} The input image data. Should be 224x224xRGBA.
-     * @return {Float32Array} The preprocessed input array.
-     */
-    function preprocess_image(image_data) {
-      assert(image_data instanceof ImageData, "Input must be ImageData.");
-      assert(image_data.width == 224, "Width must be 224.");
-      assert(image_data.height == 224, "Height must be 224.");
-
-      var width = image_data.width;
-      var height = image_data.height;
-      var npixels = width * height;
-
-      var rgba_uint8 = image_data.data;
-      assert(rgba_uint8.length == npixels * 4, "Image should be RGBA.");
-
-      // Drop alpha channel. Resnet does not need it.
-      var rgb_uint8 = new Uint8Array(npixels * 3);
-      for (var i = 0; i < npixels; i++) {
-        rgb_uint8[i * 3] = rgba_uint8[i * 4];
-        rgb_uint8[i * 3 + 1] = rgba_uint8[i * 4 + 1];
-        rgb_uint8[i * 3 + 2] = rgba_uint8[i * 4 + 2];
-      }
-
-      // Cast to float and normalize.
-      var rgb_float = new Float32Array(npixels * 3);
-      for (var i = 0; i < npixels; i++) {
-        rgb_float[i * 3] = (rgb_uint8[i * 3] - 123.0) / 58.395;
-        rgb_float[i * 3 + 1] = (rgb_uint8[i * 3 + 1] - 117.0) / 57.12;
-        rgb_float[i * 3 + 2] = (rgb_uint8[i * 3 + 2] - 104.0) / 57.375;
-      }
-
-      // Transpose. Resnet expects 3 greyscale images.
-      var data = new Float32Array(npixels * 3);
-      for (var i = 0; i < npixels; i++) {
-        data[i] = rgb_float[i * 3];
-        data[npixels + i] = rgb_float[i * 3 + 1];
-        data[npixels * 2 + i] = rgb_float[i * 3 + 2];
-      }
-
-      return data;
-    }
-
-    // Set these variables at the global scope so that we can debug more easily.
-    var tvm;
-    var syslib;
-    var graph_json_str;
-    var loaded_module;
-    var data_array;
-    var data;
-    var input;
-    var base64_params;
-    var output;
-    Module["onRuntimeInitialized"] = function () {
-      tvm = tvm_runtime.create(Module);
-
-      tvm.logger = function (message) {
-        console.log(message);
-        var d = document.createElement("div");
-        d.innerHTML = message;
-        document.getElementById("log").appendChild(d);
-      };
-
-      tvm.logger("Loading SystemLib...");
-      syslib = tvm.systemLib();
-      tvm.logger("- SystemLib loaded!");
-
-      tvm.logger("Loading resnet model...");
-      graph_json_str = load_file("resnet.json");
-      ctx = tvm.context("opengl", 0);
-      loaded_module = tvm.createGraphRuntime(graph_json_str, syslib, ctx);
-      tvm.logger("- Model loaded!");
-
-      tvm.logger("Loading model parameters...");
-      base64_params = load_file("resnet.params");
-      loaded_module.load_base64_params(base64_params);
-      tvm.logger("- Model parameters loaded!");
-
-      tvm.logger("Loading input image...");
-      var image = document.getElementById("image");
-      var image_canvas = document.getElementById("image_canvas");
-      var image_canvas_context = image_canvas.getContext("2d");
-      image_canvas_context.drawImage(image, 0, 0);
-      var image_data = image_canvas_context.getImageData(0, 0, 224, 224);
-      data_array = preprocess_image(image_data);
-      tvm.logger("- Input image loaded!");
-
-      tvm.logger("Setting input data...");
-      data_shape = JSON.parse(load_file("data_shape.json"));
-      data = tvm.empty(data_shape, "float32", ctx);
-      data.copyFrom(data_array);
-      loaded_module.set_input("data", data);
-      tvm.logger("- Input data set!");
-
-      tvm.logger("Running model...");
-      loaded_module.run();
-      tvm.logger("- Model execution completed!");
-
-      out_shape = JSON.parse(load_file("out_shape.json"));
-      output = tvm.empty(out_shape, "float32", ctx);
-      loaded_module.get_output(0, output);
-
-      prediction = argmax(output.asArray());
-      
-      synset = JSON.parse(load_file("synset.json"));
-      result_string = "Prediction: " + synset[prediction] + "\n";
-      document.getElementById("prediction").innerHTML = result_string;
-    };
-
-  </script>
-</body>
-
-</html>
diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc
index 534c2d12a6d2..df890b1ab83a 100644
--- a/src/relay/op/nn/convolution.cc
+++ b/src/relay/op/nn/convolution.cc
@@ -670,7 +670,7 @@ TVM_REGISTER_API("relay.op.nn._make.contrib_conv2d_winograd_weight_transform")
 RELAY_REGISTER_OP("nn.contrib_conv2d_winograd_weight_transform")
 .describe(R"code(Weight transformation of winograd fast convolution algorithm.
 
-Separate this into another nnvm symbol in order to enable Precompute Pass to compute the
+Separate this into another operator in order to enable Precompute Pass to compute the
 weight transformation in advance.
 
 - **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
diff --git a/tests/lint/check_file_type.py b/tests/lint/check_file_type.py
index 5b83641eb8e1..fcfef181c4e6 100644
--- a/tests/lint/check_file_type.py
+++ b/tests/lint/check_file_type.py
@@ -102,7 +102,6 @@
     # sgx file
     "apps/sgx/enclave/sgx-deps.diff",
     # html for demo purposes
-    "nnvm/tutorials/web/resnet.html",
     "tests/webgl/test_static_webgl_library.html",
     "web/example_rpc.html",
     # images are normally not allowed
diff --git a/tests/python/frontend/darknet/test_forward.py b/tests/python/frontend/darknet/test_forward.py
index 51f05d7c707d..22dd08ab52ea 100644
--- a/tests/python/frontend/darknet/test_forward.py
+++ b/tests/python/frontend/darknet/test_forward.py
@@ -251,7 +251,7 @@ def test_forward_dense_batchnorm():
     layer = LIB.make_connected_layer(1, 12, 2, 1, 1, 0)
     for i in range(5):
         layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1)
+        layer.rolling_variance[i] = np.random.rand(1) + 0.5
         layer.scales[i] = np.random.rand(1)
     net.layers[0] = layer
     net.w = net.h = 2
@@ -285,7 +285,7 @@ def test_forward_conv_batch_norm():
     layer = LIB.make_convolutional_layer(1, 224, 224, 3, 32, 1, 3, 2, 0, 1, 1, 0, 0, 0)
     for i in range(32):
         layer.rolling_mean[i] = np.random.rand(1)
-        layer.rolling_variance[i] = np.random.rand(1)
+        layer.rolling_variance[i] = np.random.rand(1) + 0.5
     net.layers[0] = layer
     net.w = net.h = 224
     LIB.resize_network(net, 224, 224)
diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py
index be4436dda07e..18250d0ea5a6 100644
--- a/tests/python/frontend/mxnet/test_forward.py
+++ b/tests/python/frontend/mxnet/test_forward.py
@@ -734,7 +734,7 @@ def verify(shape, axis=1, fix_gamma=False):
         gamma = np.random.uniform(size=(shape[axis])).astype("float32")
         beta = np.random.uniform(size=(shape[axis])).astype("float32")
         moving_mean = np.random.uniform(size=(shape[axis])).astype("float32")
-        moving_var = np.random.uniform(size=(shape[axis])).astype("float32")
+        moving_var = np.abs(np.random.uniform(size=(shape[axis])).astype("float32")) + 0.5
         ref_res = mx.nd.BatchNorm(mx.nd.array(x), mx.nd.array(gamma), mx.nd.array(beta),
                                   mx.nd.array(moving_mean), mx.nd.array(moving_var),
                                   axis=axis, use_global_stats=True, fix_gamma=fix_gamma)
diff --git a/tests/python/frontend/nnvm_to_relay/test_alter_conv2d.py b/tests/python/frontend/nnvm_to_relay/test_alter_conv2d.py
deleted file mode 100644
index ed8b9cd9ed97..000000000000
--- a/tests/python/frontend/nnvm_to_relay/test_alter_conv2d.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Test alter conv2d layout pass"""
-import tvm
-import nnvm
-
-from tvm import relay
-from tvm import autotvm
-from tvm.relay import transform
-from tvm.relay.analysis import alpha_equal
-
-
-def test_alter_layout_conv2d():
-    """Additional layout transformations should occour on the graph.
-    """
-
-    def convnet():
-        """Alternating layout of simple convnet (from image super-resolution).
-        """
-        bias1 = relay.var('bias1', shape=(64,))
-        bias2 = relay.var('bias2', shape=(64,))
-        bias3 = relay.var('bias3', shape=(64,))
-        bias4 = relay.var('bias4', shape=(64,))
-        weight1 = relay.var('weight1', shape=(64, 1, 5, 5))
-        weight2 = relay.var('weight2', shape=(64, 64, 3, 3))
-        weight3 = relay.var('weight3', shape=(64, 64, 3, 3))
-        weight4 = relay.var('weight4', shape=(64, 64, 3, 3))
-        data = relay.var("x", shape=(1, 1, 224, 224))
-        n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5])
-        n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2)
-        n02 = relay.add(n00, n01)
-        n03 = relay.nn.relu(n02)
-        n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3])
-        n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2)
-        n06 = relay.add(n04, n05)
-        n07 = relay.nn.relu(n06)
-        n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3])
-        n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2)
-        n10 = relay.add(n08, n09)
-        n11 = relay.nn.relu(n10)
-        n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3])
-        n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2)
-        n14 = relay.add(n12, n13)
-        n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224])
-        n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3])
-        net = relay.reshape(n16, newshape=[1, 1, 672, 672])
-        args = relay.analysis.free_vars(net)
-        return relay.Function(args, net)
-
-    # orig net
-    N = convnet()
-
-    # trigger a test
-    # for each known alter_conv2d
-    targets=['cuda',
-             'opencl -device=mali',
-             'opencl -device=intel_graphics',
-
-             'llvm -device=arm_cpu',
-             'llvm -device=core-avx-ii']
-
-    for tgt in targets:
-        with tvm.target.create(tgt) as target:
-            with autotvm.tophub.context(target):
-                mod = relay.Module.from_expr(N)
-                mod = transform.AlterOpLayout()(mod)
-                O = mod["main"]
-
-                # graph should differ
-                assert not relay.analysis.alpha_equal(N, O)
-
-if __name__ == "__main__":
-    import numpy as np
-    np.random.seed(42)
-    test_alter_layout_conv2d()
diff --git a/tests/python/frontend/nnvm_to_relay/test_forward.py b/tests/python/frontend/nnvm_to_relay/test_forward.py
deleted file mode 100644
index 6a00b5a471f4..000000000000
--- a/tests/python/frontend/nnvm_to_relay/test_forward.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-
-import tvm
-import nnvm
-import nnvm.testing
-from nnvm.to_relay import to_relay
-from tvm import relay
-from tvm.relay.testing.config import ctx_list
-from tvm.contrib import graph_runtime
-
-def verify_nnvm_to_relay(nnvm_sym, params, data_shape=(1, 3, 224, 224)):
-    def get_nnvm_output(sym, x, params, target, ctx, dtype='float32'):
-        shape_dict = {'data': x.shape}
-        with nnvm.compiler.build_config(opt_level=3):
-            graph, lib, params = nnvm.compiler.build(sym, target, shape_dict, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input("data", tvm.nd.array(x.astype(dtype)))
-        m.set_input(**params)
-        m.run()
-        return m.get_output(0).asnumpy()
-
-    def get_relay_output(sym, x, params, target, ctx, dtype='float32'):
-        shape_dict = {'data': x.shape}
-        func, params = to_relay(sym, shape_dict, dtype, params)
-        with relay.build_config(opt_level=3):
-            graph, lib, params = relay.build(func, target=target, params=params)
-        m = graph_runtime.create(graph, lib, ctx)
-        m.set_input("data", tvm.nd.array(x.astype(dtype)))
-        m.set_input(**params)
-        m.run()
-        return m.get_output(0).asnumpy()
-
-    x = np.random.uniform(size=data_shape)
-    for target, ctx in ctx_list():
-        nnvm_out = get_nnvm_output(nnvm_sym, x, params, target, ctx)
-        relay_out = get_relay_output(nnvm_sym, x, params, target, ctx)
-        tvm.testing.assert_allclose(nnvm_out, relay_out, rtol=1e-5, atol=1e-5)
-
-
-def test_forward_mlp():
-    model, params = nnvm.testing.mlp.get_workload(1)
-    verify_nnvm_to_relay(model, params)
-
-
-def test_forward_vgg():
-    model, params = nnvm.testing.vgg.get_workload(1)
-    verify_nnvm_to_relay(model, params)
-
-
-def test_forward_resnet():
-    model, params = nnvm.testing.resnet.get_workload(1)
-    verify_nnvm_to_relay(model, params)
-
-
-def test_forward_squeezenet():
-    model, params = nnvm.testing.squeezenet.get_workload(1)
-    verify_nnvm_to_relay(model, params)
-
-
-def test_forward_inception_v3():
-    model, params = nnvm.testing.inception_v3.get_workload(1)
-    verify_nnvm_to_relay(model, params, data_shape=(1, 3, 299, 299))
-
-
-def test_forward_densenet():
-    model, params = nnvm.testing.squeezenet.get_workload(1)
-    verify_nnvm_to_relay(model, params)
-
-
-def test_forward_dqn():
-    model, params = nnvm.testing.dqn.get_workload(1)
-    verify_nnvm_to_relay(model, params, data_shape=(1, 4, 84, 84))
-
-
-def test_forward_split_concatenate():
-    shape = (2, 16)
-
-    tensor = nnvm.sym.Variable("data", shape=shape)
-
-    splited = nnvm.sym.split(tensor, indices_or_sections=2, axis=1)
-
-    concatenated = nnvm.sym.concatenate(*splited, axis=1)
-
-    params = {}
-
-    verify_nnvm_to_relay(splited[0], params, data_shape=shape)
-    verify_nnvm_to_relay(splited[1], params, data_shape=shape)
-    verify_nnvm_to_relay(splited, params, data_shape=shape)
-    verify_nnvm_to_relay(concatenated, params, data_shape=shape)
-
-
-if __name__ == '__main__':
-    test_forward_mlp()
-    test_forward_vgg()
-    test_forward_resnet()
-    test_forward_squeezenet()
-    test_forward_inception_v3()
-    test_forward_densenet()
-    test_forward_dqn()
-    test_forward_split_concatenate()
diff --git a/tutorials/autotvm/tune_relay_cuda.py b/tutorials/autotvm/tune_relay_cuda.py
index efce3cb9d832..b931172a9f64 100644
--- a/tutorials/autotvm/tune_relay_cuda.py
+++ b/tutorials/autotvm/tune_relay_cuda.py
@@ -71,7 +71,7 @@
 # Define Network
 # --------------
 # First we need to define the network in relay frontend API.
-# We can load some pre-defined network from :code:`nnvm.testing`.
+# We can load some pre-defined network from :code:`tvm.relay.testing`.
 # We can also load models from MXNet, ONNX and TensorFlow.
 
 def get_network(name, batch_size):
diff --git a/tutorials/frontend/deploy_model_on_android.py b/tutorials/frontend/deploy_model_on_android.py
index 813254df46b3..3d0e83d5e450 100644
--- a/tutorials/frontend/deploy_model_on_android.py
+++ b/tutorials/frontend/deploy_model_on_android.py
@@ -78,7 +78,7 @@
 #
 # .. code-block:: bash
 #
-#   echo 'export PYTHONPATH=/workspace/python:/workspacem/topi/python:/workspace/nnvm/python/:/workspace/vta/python:${PYTHONPATH}' >> ~/.bashrc
+#   echo 'export PYTHONPATH=/workspace/python:/workspacem/topi/python:/workspace/vta/python:${PYTHONPATH}' >> ~/.bashrc
 #   source ~/.bashrc
 
 #################################################################