Skip to content

Commit

Permalink
[UTILS, DOC] Use TVM file downloading utility, conv2d tutorial (apach…
Browse files Browse the repository at this point in the history
  • Loading branch information
tmoreau89 authored and tqchen committed Jul 12, 2018
1 parent 5c8177b commit a81a167
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 108 deletions.
14 changes: 11 additions & 3 deletions vta/examples/resnet18/pynq/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@

Follow the first two parts of the [Installation Guide](../../../docs/how_to/install.md) to make sure that the VTA python libraries are installed, and that the RPC server is running on the Pynq FPGA dev board.

Simply run the following python script:
We recommend leaving the `config.json` to its default parameterization (of course you can change the target between "sim" and "pynq").

Simply run the example program. We rely on pickle to store parameters which now only works with python2.
```bash
python imagenet_predict.py
python2 imagenet_predict.py
```

This will run imagenet classification using the ResNet18 architecture on a VTA design that performs 8-bit integer inference, to perform classification on a cat image `cat.jpg`.
The script will first download the following files into `_data/` directory:
* `cat.jpg` which provides a test sample for the ImageNet classifier
* `quantize_graph.json` which describes the NNVM graph of the 8-bit ResNet-18
* `quantize_params.plk` which contains the network parameters
* `synset.txt` which contains the ImageNet categories

Next, it will run imagenet classification using the ResNet18 architecture on a VTA design that performs 8-bit integer inference, to perform classification on a cat image `cat.jpg`.

The script reports runtime measured on the Pynq board (in seconds), and the top-1 result category:
```
Expand Down
24 changes: 15 additions & 9 deletions vta/examples/resnet18/pynq/imagenet_predict.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
# some standard imports
import nnvm
import tvm
from nnvm.compiler import graph_attr
import vta
import vta.testing
import os
import numpy as np
from PIL import Image
import pickle
import json
import logging
import wget

from PIL import Image
from nnvm.compiler import graph_attr
from tvm.contrib import graph_runtime, rpc, util
from tvm.contrib.download import download

bfactor = 1
cfactor = 16
Expand All @@ -20,15 +21,20 @@
debug_fpga_only = False

# Obtain model and hardware files (they're too large to check-in)
# Download them into _data dir
data_dir = "_data/"
url = "https://homes.cs.washington.edu/~moreau/media/vta/"
TEST_FILE = 'cat.jpg'
CATEG_FILE = 'synset.txt'
RESNET_GRAPH_FILE = 'quantize_graph.json'
RESNET_PARAMS_FILE = 'quantize_params.pkl'
# Create data dir
if not os.path.exists(data_dir):
os.makedirs(data_dir)
# Download files
for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE]:
if not os.path.isfile(file):
print ("Downloading {}".format(file))
wget.download(url+file)
download(os.path.join(url, file), os.path.join(data_dir, file))

if verbose:
logging.basicConfig(level=logging.DEBUG)
Expand All @@ -40,8 +46,8 @@
if vta.get_env().TARGET == "sim":
target_host = "llvm"

synset = eval(open(os.path.join(CATEG_FILE)).read())
image = Image.open(os.path.join(TEST_FILE)).resize((224, 224))
synset = eval(open(os.path.join(data_dir, CATEG_FILE)).read())
image = Image.open(os.path.join(data_dir, TEST_FILE)).resize((224, 224))

def transform_image(image):
image = np.array(image) - np.array([123., 117., 104.])
Expand Down Expand Up @@ -88,9 +94,9 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
import nnvm.compiler
np.random.seed(0)
sym = nnvm.graph.load_json(
open(os.path.join(RESNET_GRAPH_FILE)).read())
open(os.path.join(data_dir, RESNET_GRAPH_FILE)).read())
params = pickle.load(
open(os.path.join(RESNET_PARAMS_FILE)))
open(os.path.join(data_dir, RESNET_PARAMS_FILE), 'rb'))

shape_dict = {"data": x.shape}
dtype_dict = {"data": 'float32'}
Expand Down
39 changes: 28 additions & 11 deletions vta/python/vta/bitstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@
from __future__ import absolute_import as _abs

import os
import urllib
import sys

from tvm.contrib.download import download
from .environment import get_env

if sys.version_info >= (3,):
import urllib.error as urllib2
else:
import urllib2

# bitstream repo
BITSTREAM_URL = "https://github.com/uwsaml/vta-distro/raw/master/bitstreams/"

Expand Down Expand Up @@ -41,15 +48,25 @@ def download_bitstream():
url = os.path.join(BITSTREAM_URL, env.TARGET)
url = os.path.join(url, env.HW_VER)
url = os.path.join(url, env.BITSTREAM)
# Check that the bitstream is accessible from the server
if urllib.urlopen(url).getcode() == 404:
# Raise error - the solution when this happens it to build your own bitstream and add it
# to your VTA_CACHE_PATH
raise RuntimeError(
"Error: {} is not available. It appears that this configuration has not been built."
.format(url))
else:
urllib.urlretrieve(url, bit)
success = True

try:
download(url, bit)
except urllib2.HTTPError as err:
if err.code == 404:
raise RuntimeError(
# Raise error - the solution when this happens it to build your
# own bitstream and add it to your $VTA_CACHE_PATH
"{} is not available. It appears that this configuration \
bistream has not been cached. Please compile your own bitstream (see hardware \
compilation guide to get Xilinx toolchains setup) and add it to your \
$VTA_CACHE_PATH. Alternatively edit your config.json back to its default \
settings. You can see the list of available bitstreams under {}"
.format(url, BITSTREAM_URL))
else:
raise RuntimeError(
# This could happen when trying to access the URL behind a proxy
"Something went wrong when trying to access {}. Check your \
internet connection or proxy settings."
.format(url))

return success
47 changes: 29 additions & 18 deletions vta/python/vta/testing/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,34 @@ def run(run_func):
"""
env = get_env()

# Run on local sim rpc if necessary
local_rpc = int(os.environ.get("VTA_LOCAL_SIM_RPC", "0"))
if local_rpc:
env.TARGET = "sim"
remote = rpc.connect("localhost", local_rpc)
run_func(env, remote)
else:
# run on simulator
if simulator.enabled():
env.TARGET = "sim"
if env.TARGET == "sim":

# Talk to local RPC if necessary to debug RPC server.
# Compile vta on your host with make at the root.
# Make sure TARGET is set to "sim" in the config.json file.
# Then launch the RPC server on the host machine
# with ./apps/pynq_rpc/start_rpc_server.sh
# Set your VTA_LOCAL_SIM_RPC environment variable to
# the port it's listening to, e.g. 9090
local_rpc = int(os.environ.get("VTA_LOCAL_SIM_RPC", "0"))
if local_rpc:
remote = rpc.connect("localhost", local_rpc)
run_func(env, remote)
else:
# Make sure simulation library exists
# If this fails, build vta on host (make)
# with TARGET="sim" in the json.config file.
assert simulator.enabled()
run_func(env, rpc.LocalSession())

# Run on PYNQ if env variable exists
host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
if host:
env.TARGET = "pynq"
port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
port = int(port)
remote = rpc.connect(host, port)
run_func(env, remote)
elif env.TARGET == "pynq":

# Run on PYNQ if env variable exists
host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
port = int(os.environ.get("VTA_PYNQ_RPC_PORT", None))
if host and port:
remote = rpc.connect(host, port)
run_func(env, remote)
else:
raise RuntimeError(
"Please set the VTA_PYNQ_RPC_HOST and VTA_PYNQ_RPC_PORT environment variables")
17 changes: 9 additions & 8 deletions vta/tests/python/integration/test_benchmark_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def run_gemm_packed(env, remote, batch_size, channel, block):
channel // env.BLOCK_OUT,
env.BATCH,
env.BLOCK_OUT)
num_ops = channel * channel * batch_size
# To compute number of ops, use a x2 factor for FMA
num_ops = 2 * channel * channel * batch_size

ko = tvm.reduce_axis((0, channel // env.BLOCK_IN), name='ko')
ki = tvm.reduce_axis((0, env.BLOCK_IN), name='ki')
Expand Down Expand Up @@ -157,14 +158,14 @@ def run_schedule(load_inp,

def gemm_normal(print_ir):
mock = env.mock
print("----- GEMM GFLOPS End-to-End Test-------")
print("----- GEMM GOPS End-to-End Test-------")
def run_test(header, print_ir, check_correctness):
cost = run_schedule(
env.dma_copy, env.dma_copy, env.gemm, env.alu, env.dma_copy,
print_ir, check_correctness)
gops = (num_ops / cost.mean) / float(10 ** 9)
print(header)
print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
with vta.build_config():
run_test("NORMAL", print_ir, True)

Expand All @@ -177,7 +178,7 @@ def run_test(header, print_ir):
print_ir, False)
gops = (num_ops / cost.mean) / float(10 ** 9)
print(header)
print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
with vta.build_config():
run_test("NORMAL", print_ir)

Expand All @@ -190,7 +191,7 @@ def run_test(header, print_ir):
print_ir, False)
gops = (num_ops / cost.mean) / float(10 ** 9)
print(header)
print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))
with vta.build_config():
run_test("NORMAL", print_ir)
print("")
Expand All @@ -204,7 +205,7 @@ def run_test(header, print_ir):
gops = (num_ops / cost.mean) / float(10 ** 9)
bandwith = (batch_size * channel * env.INP_WIDTH / cost.mean) / float(10 ** 9)
print(header)
print("\tTime cost = %g sec/op, %g GFLOPS, bandwidth=%g Gbits" % (
print("\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits" % (
cost.mean, gops, bandwith))
with vta.build_config():
run_test("NORMAL", print_ir)
Expand All @@ -219,7 +220,7 @@ def run_test(header, print_ir):
gops = (num_ops / cost.mean) / float(10 ** 9)
bandwith = (channel * channel * env.WGT_WIDTH / cost.mean) / float(10 ** 9)
print(header)
print("\tTime cost = %g sec/op, %g GFLOPS, bandwidth=%g Gbits" % (
print("\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits" % (
cost.mean, gops, bandwith))
with vta.build_config():
run_test("NORMAL", print_ir)
Expand All @@ -235,7 +236,7 @@ def run_test(header, print_ir):
gops = (num_ops / cost.mean) / float(10 ** 9)
bandwith = (batch_size * channel * env.OUT_WIDTH / cost.mean) / float(10 ** 9)
print(header)
print("\tTime cost = %g sec/op, %g GFLOPS, bandwidth=%g Gbits" % (
print("\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits" % (
cost.mean, gops, bandwith))
with vta.build_config():
run_test("NORMAL", print_ir)
Expand Down
3 changes: 2 additions & 1 deletion vta/tests/python/integration/test_benchmark_topi_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def run_vta_conv2d(env, remote, key, batch_size, wl, profile=True):
res = my_clip(res, 0, 127)
res = topi.cast(res, "int8")

# To compute number of ops, use a x2 factor for FMA
num_ops = 2 * batch_size * fout_height * fout_width * wl.hkernel * wl.wkernel * wl.out_filter * wl.in_filter

a_shape = (batch_size, wl.in_filter, wl.height, wl.width)
Expand Down Expand Up @@ -118,7 +119,7 @@ def conv_normal(print_ir):
print(vta.lower(s, [data, kernel, bias, res], simple_mode=True))
cost = verify(s, True)
gops = (num_ops / cost.mean) / float(10 ** 9)
print("\tTime cost = %g sec/op, %g GFLOPS" % (cost.mean, gops))
print("\tTime cost = %g sec/op, %g GOPS" % (cost.mean, gops))

conv_normal(False)

Expand Down
Loading

0 comments on commit a81a167

Please sign in to comment.