[UTILS, DOC] Use TVM file downloading utility, conv2d tutorial (apach…

…e#48)
tqchen · Jul 12, 2018 · a81a167 · a81a167
1 parent 5c8177b
commit a81a167
Show file tree

Hide file tree

Showing 9 changed files with 156 additions and 108 deletions.
diff --git a/vta/examples/resnet18/pynq/README.md b/vta/examples/resnet18/pynq/README.md
@@ -2,12 +2,20 @@
 
 Follow the first two parts of the [Installation Guide](../../../docs/how_to/install.md) to make sure that the VTA python libraries are installed, and that the RPC server is running on the Pynq FPGA dev board.
 
-Simply run the following python script:
+We recommend leaving the `config.json` to its default parameterization (of course you can change the target between "sim" and "pynq").
+
+Simply run the example program. We rely on pickle to store parameters which now only works with python2.
 ```bash
-python imagenet_predict.py
+python2 imagenet_predict.py
 ```
 
-This will run imagenet classification using the ResNet18 architecture on a VTA design that performs 8-bit integer inference, to perform classification on a cat image `cat.jpg`.
+The script will first download the following files into `_data/` directory:
+* `cat.jpg` which provides a test sample for the ImageNet classifier
+* `quantize_graph.json` which describes the NNVM graph of the 8-bit ResNet-18
+* `quantize_params.plk` which contains the network parameters
+* `synset.txt` which contains the ImageNet categories
+
+Next, it will run imagenet classification using the ResNet18 architecture on a VTA design that performs 8-bit integer inference, to perform classification on a cat image `cat.jpg`.
 
 The script reports runtime measured on the Pynq board (in seconds), and the top-1 result category:
 ```

diff --git a/vta/examples/resnet18/pynq/imagenet_predict.py b/vta/examples/resnet18/pynq/imagenet_predict.py
@@ -1,17 +1,18 @@
 # some standard imports
 import nnvm
 import tvm
-from nnvm.compiler import graph_attr
 import vta
 import vta.testing
 import os
 import numpy as np
-from PIL import Image
 import pickle
 import json
 import logging
-import wget
+
+from PIL import Image
+from nnvm.compiler import graph_attr
 from tvm.contrib import graph_runtime, rpc, util
+from tvm.contrib.download import download
 
 bfactor = 1
 cfactor = 16
@@ -20,15 +21,20 @@
 debug_fpga_only = False
 
 # Obtain model and hardware files (they're too large to check-in)
+# Download them into _data dir
+data_dir = "_data/"
 url = "https://homes.cs.washington.edu/~moreau/media/vta/"
 TEST_FILE = 'cat.jpg'
 CATEG_FILE = 'synset.txt'
 RESNET_GRAPH_FILE = 'quantize_graph.json'
 RESNET_PARAMS_FILE = 'quantize_params.pkl'
+# Create data dir
+if not os.path.exists(data_dir):
+    os.makedirs(data_dir)
+# Download files
 for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE]:
     if not os.path.isfile(file):
-        print ("Downloading {}".format(file))
-        wget.download(url+file)
+        download(os.path.join(url, file), os.path.join(data_dir, file))
 
 if verbose:
     logging.basicConfig(level=logging.DEBUG)
@@ -40,8 +46,8 @@
 if vta.get_env().TARGET == "sim":
     target_host = "llvm"
 
-synset = eval(open(os.path.join(CATEG_FILE)).read())
-image = Image.open(os.path.join(TEST_FILE)).resize((224, 224))
+synset = eval(open(os.path.join(data_dir, CATEG_FILE)).read())
+image = Image.open(os.path.join(data_dir, TEST_FILE)).resize((224, 224))
 
 def transform_image(image):
     image = np.array(image) - np.array([123., 117., 104.])
@@ -88,9 +94,9 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
 import nnvm.compiler
 np.random.seed(0)
 sym = nnvm.graph.load_json(
-    open(os.path.join(RESNET_GRAPH_FILE)).read())
+    open(os.path.join(data_dir, RESNET_GRAPH_FILE)).read())
 params = pickle.load(
-    open(os.path.join(RESNET_PARAMS_FILE)))
+    open(os.path.join(data_dir, RESNET_PARAMS_FILE), 'rb'))
 
 shape_dict = {"data": x.shape}
 dtype_dict = {"data": 'float32'}

diff --git a/vta/python/vta/bitstream.py b/vta/python/vta/bitstream.py
@@ -2,9 +2,16 @@
 from __future__ import absolute_import as _abs
 
 import os
-import urllib
+import sys
+
+from tvm.contrib.download import download
 from .environment import get_env
 
+if sys.version_info >= (3,):
+    import urllib.error as urllib2
+else:
+    import urllib2
+
 # bitstream repo
 BITSTREAM_URL = "https://github.com/uwsaml/vta-distro/raw/master/bitstreams/"
 
@@ -41,15 +48,25 @@ def download_bitstream():
     url = os.path.join(BITSTREAM_URL, env.TARGET)
     url = os.path.join(url, env.HW_VER)
     url = os.path.join(url, env.BITSTREAM)
-    # Check that the bitstream is accessible from the server
-    if urllib.urlopen(url).getcode() == 404:
-        # Raise error - the solution when this happens it to build your own bitstream and add it
-        # to your VTA_CACHE_PATH
-        raise RuntimeError(
-            "Error: {} is not available. It appears that this configuration has not been built."
-            .format(url))
-    else:
-        urllib.urlretrieve(url, bit)
-        success = True
+
+    try:
+        download(url, bit)
+    except urllib2.HTTPError as err:
+        if err.code == 404:
+            raise RuntimeError(
+                # Raise error - the solution when this happens it to build your
+                # own bitstream and add it to your $VTA_CACHE_PATH
+                "{} is not available. It appears that this configuration \
+bistream has not been cached. Please compile your own bitstream (see hardware \
+compilation guide to get Xilinx toolchains setup) and add it to your \
+$VTA_CACHE_PATH. Alternatively edit your config.json back to its default \
+settings. You can see the list of available bitstreams under {}"
+                .format(url, BITSTREAM_URL))
+        else:
+            raise RuntimeError(
+                # This could happen when trying to access the URL behind a proxy
+                "Something went wrong when trying to access {}. Check your \
+internet connection or proxy settings."
+                .format(url))
 
     return success
diff --git a/vta/python/vta/testing/util.py b/vta/python/vta/testing/util.py
@@ -15,23 +15,34 @@ def run(run_func):
     """
     env = get_env()
 
-    # Run on local sim rpc if necessary
-    local_rpc = int(os.environ.get("VTA_LOCAL_SIM_RPC", "0"))
-    if local_rpc:
-        env.TARGET = "sim"
-        remote = rpc.connect("localhost", local_rpc)
-        run_func(env, remote)
-    else:
-        # run on simulator
-        if simulator.enabled():
-            env.TARGET = "sim"
+    if env.TARGET == "sim":
+
+        # Talk to local RPC if necessary to debug RPC server.
+        # Compile vta on your host with make at the root.
+        # Make sure TARGET is set to "sim" in the config.json file.
+        # Then launch the RPC server on the host machine
+        # with ./apps/pynq_rpc/start_rpc_server.sh
+        # Set your VTA_LOCAL_SIM_RPC environment variable to
+        # the port it's listening to, e.g. 9090
+        local_rpc = int(os.environ.get("VTA_LOCAL_SIM_RPC", "0"))
+        if local_rpc:
+            remote = rpc.connect("localhost", local_rpc)
+            run_func(env, remote)
+        else:
+            # Make sure simulation library exists
+            # If this fails, build vta on host (make)
+            # with TARGET="sim" in the json.config file.
+            assert simulator.enabled()
             run_func(env, rpc.LocalSession())
 
-    # Run on PYNQ if env variable exists
-    host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
-    if host:
-        env.TARGET = "pynq"
-        port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
-        port = int(port)
-        remote = rpc.connect(host, port)
-        run_func(env, remote)
+    elif env.TARGET == "pynq":
+
+        # Run on PYNQ if env variable exists
+        host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
+        port = int(os.environ.get("VTA_PYNQ_RPC_PORT", None))
+        if host and port:
+            remote = rpc.connect(host, port)
+            run_func(env, remote)
+        else:
+            raise RuntimeError(
+                "Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables")
diff --git a/vta/tests/python/integration/test_benchmark_gemm.py b/vta/tests/python/integration/test_benchmark_gemm.py
@@ -18,7 +18,8 @@ def run_gemm_packed(env, remote, batch_size, channel, block):
                      channel // env.BLOCK_OUT,
                      env.BATCH,
                      env.BLOCK_OUT)
-        num_ops = channel * channel * batch_size
+        # To compute number of ops, use a x2 factor for FMA
+        num_ops = 2 * channel * channel * batch_size
 
         ko = tvm.reduce_axis((0, channel // env.BLOCK_IN), name='ko')
         ki = tvm.reduce_axis((0, env.BLOCK_IN), name='ki')
@@ -157,14 +158,14 @@ def run_schedule(load_inp,
 
         def gemm_normal(print_ir):
             mock = env.mock
-            print("----- GEMM GFLOPS End-to-End Test-------")
+            print("----- GEMM GOPS End-to-End Test-------")
             def run_test(header, print_ir, check_correctness):
                 cost = run_schedule(
                     env.dma_copy, env.dma_copy, env.gemm, env.alu, env.dma_copy,
                     print_ir, check_correctness)
                 gops = (num_ops / cost.mean) / float(10 ** 9)
                 print(header)
-                print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
+                print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
             with vta.build_config():
                 run_test("NORMAL", print_ir, True)
 
@@ -177,7 +178,7 @@ def run_test(header, print_ir):
                     print_ir, False)
                 gops = (num_ops / cost.mean) / float(10 ** 9)
                 print(header)
-                print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
+                print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
             with vta.build_config():
                 run_test("NORMAL", print_ir)
 
@@ -190,7 +191,7 @@ def run_test(header, print_ir):
                     print_ir, False)
                 gops = (num_ops / cost.mean) / float(10 ** 9)
                 print(header)
-                print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
+                print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
             with vta.build_config():
                 run_test("NORMAL", print_ir)
             print("")
@@ -204,7 +205,7 @@ def run_test(header, print_ir):
                 gops = (num_ops / cost.mean) / float(10 ** 9)
                 bandwith = (batch_size * channel * env.INP_WIDTH / cost.mean) / float(10 ** 9)
                 print(header)
-                print("\tTime cost = %g sec/op, %g GFLOPS, bandwidth=%g Gbits" % (
+                print("\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits" % (
                     cost.mean, gops, bandwith))
             with vta.build_config():
                 run_test("NORMAL", print_ir)
@@ -219,7 +220,7 @@ def run_test(header, print_ir):
                 gops = (num_ops / cost.mean) / float(10 ** 9)
                 bandwith = (channel * channel * env.WGT_WIDTH / cost.mean) / float(10 ** 9)
                 print(header)
-                print("\tTime cost = %g sec/op, %g GFLOPS, bandwidth=%g Gbits" % (
+                print("\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits" % (
                     cost.mean, gops, bandwith))
             with vta.build_config():
                 run_test("NORMAL", print_ir)
@@ -235,7 +236,7 @@ def run_test(header, print_ir):
                 gops = (num_ops / cost.mean) / float(10 ** 9)
                 bandwith = (batch_size * channel * env.OUT_WIDTH / cost.mean) / float(10 ** 9)
                 print(header)
-                print("\tTime cost = %g sec/op, %g GFLOPS, bandwidth=%g Gbits" % (
+                print("\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits" % (
                     cost.mean, gops, bandwith))
             with vta.build_config():
                 run_test("NORMAL", print_ir)

diff --git a/vta/tests/python/integration/test_benchmark_topi_conv2d.py b/vta/tests/python/integration/test_benchmark_topi_conv2d.py
@@ -42,6 +42,7 @@ def run_vta_conv2d(env, remote, key, batch_size, wl, profile=True):
         res = my_clip(res, 0, 127)
         res = topi.cast(res, "int8")
 
+        # To compute number of ops, use a x2 factor for FMA
         num_ops = 2 * batch_size * fout_height * fout_width * wl.hkernel * wl.wkernel * wl.out_filter * wl.in_filter
 
         a_shape = (batch_size, wl.in_filter, wl.height, wl.width)
@@ -118,7 +119,7 @@ def conv_normal(print_ir):
                     print(vta.lower(s, [data, kernel, bias, res], simple_mode=True))
             cost = verify(s, True)
             gops = (num_ops / cost.mean) / float(10 ** 9)
-            print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
+            print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
 
         conv_normal(False)