-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BENCHMARK][ADRENO] Adreno Benchmarks with texture #13675
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,278 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
"""Benchmark script for various models on Adreno GPU. | ||
""" | ||
import argparse | ||
|
||
import numpy as np | ||
|
||
import os | ||
import sys | ||
import tvm | ||
from tvm import te | ||
from tvm.relay import testing | ||
from tvm.contrib.utils import tempdir | ||
import tvm.contrib.graph_executor as runtime | ||
from tvm import relay | ||
from tvm import autotvm | ||
from tvm.contrib import utils, ndk | ||
|
||
|
||
def get_network(name, batch_size, dtype="float32"): | ||
"""Get the symbol definition and random weight of a network | ||
|
||
Parameters | ||
---------- | ||
name: str | ||
The name of the network, can be 'resnet-18', 'resnet-50', 'vgg-16', 'inception_v3', 'mobilenet', ... | ||
batch_size: int | ||
batch size | ||
dtype: str | ||
Data type | ||
|
||
Returns | ||
------- | ||
net: tvm.IRModule | ||
The relay function of network definition | ||
params: dict | ||
The random parameters for benchmark | ||
input_shape: tuple | ||
The shape of input tensor | ||
output_shape: tuple | ||
The shape of output tensor | ||
""" | ||
input_shape = (batch_size, 3, 224, 224) | ||
output_shape = (batch_size, 1000) | ||
|
||
if name == "mobilenet": | ||
net, params = testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype) | ||
elif name == "inception_v3": | ||
input_shape = (batch_size, 3, 299, 299) | ||
net, params = testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype) | ||
elif "resnet" in name: | ||
n_layer = int(name.split("-")[1]) | ||
net, params = testing.resnet.get_workload( | ||
num_layers=n_layer, batch_size=batch_size, dtype=dtype | ||
) | ||
elif "vgg" in name: | ||
n_layer = int(name.split("-")[1]) | ||
net, params = testing.vgg.get_workload( | ||
num_layers=n_layer, batch_size=batch_size, dtype=dtype | ||
) | ||
elif "densenet" in name: | ||
n_layer = int(name.split("-")[1]) | ||
net, params = testing.densenet.get_workload( | ||
densenet_size=n_layer, batch_size=batch_size, dtype=dtype | ||
) | ||
elif "squeezenet" in name: | ||
version = name.split("_v")[1] | ||
net, params = testing.squeezenet.get_workload( | ||
batch_size=batch_size, version=version, dtype=dtype | ||
) | ||
elif name == "mxnet": | ||
# an example for mxnet model | ||
from mxnet.gluon.model_zoo.vision import get_model | ||
|
||
block = get_model("resnet18_v1", pretrained=True) | ||
net, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype) | ||
net = net["main"] | ||
net = relay.Function( | ||
net.params, relay.nn.softmax(net.body), None, net.type_params, net.attrs | ||
) | ||
net = tvm.IRModule.from_expr(net) | ||
else: | ||
raise ValueError("Unsupported network: " + name) | ||
|
||
return net, params, input_shape, output_shape | ||
|
||
|
||
def print_progress(msg): | ||
"""print progress message | ||
|
||
Parameters | ||
---------- | ||
msg: str | ||
The message to print | ||
""" | ||
sys.stdout.write(msg + "\r") | ||
sys.stdout.flush() | ||
|
||
|
||
def tune_tasks( | ||
tasks, | ||
measure_option, | ||
n_trial=1024, | ||
early_stopping=None, | ||
log_filename="tuning.log", | ||
): | ||
from tvm.autotvm.tuner import XGBTuner | ||
|
||
tmp_log_file = log_filename + ".tmp" | ||
|
||
for i, tsk in enumerate(reversed(tasks)): | ||
print("Task: ", tsk) | ||
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) | ||
tuner_obj = XGBTuner(tsk, loss_type="rank") | ||
|
||
tsk_trial = min(n_trial, len(tsk.config_space)) | ||
tuner_obj.tune( | ||
n_trial=tsk_trial, | ||
early_stopping=early_stopping, | ||
measure_option=measure_option, | ||
callbacks=[ | ||
autotvm.callback.progress_bar(tsk_trial, prefix=prefix), | ||
autotvm.callback.log_to_file(tmp_log_file), | ||
], | ||
) | ||
|
||
autotvm.record.pick_best(tmp_log_file, log_filename) | ||
|
||
|
||
def evaluate_network(network, target, target_host, dtype, repeat): | ||
print_progress(network) | ||
net, params, input_shape, output_shape = get_network(network, batch_size=1, dtype=dtype) | ||
|
||
# Auto Tuning | ||
tune_log = "adreno-" + network + "-" + dtype + ".log" | ||
tuning_options = { | ||
"log_filename": tune_log, | ||
"early_stopping": None, | ||
"measure_option": autotvm.measure_option( | ||
builder=autotvm.LocalBuilder(build_func=ndk.create_shared, timeout=15), | ||
runner=autotvm.RPCRunner( | ||
args.rpc_key, | ||
host=args.host, | ||
port=args.port, | ||
number=3, | ||
timeout=600, | ||
), | ||
), | ||
} | ||
if args.tune: | ||
tasks = autotvm.task.extract_from_program( | ||
net, target=target, target_host=target_host, params=params | ||
) | ||
tune_tasks(tasks, **tuning_options) | ||
|
||
print_progress("%-20s building..." % network) | ||
|
||
# Build the tuning log | ||
if os.path.exists(tune_log): | ||
with autotvm.apply_history_best(tune_log): | ||
with tvm.transform.PassContext(opt_level=3): | ||
lib = relay.build( | ||
net, target=tvm.target.Target(target, host=target_host), params=params | ||
) | ||
else: | ||
with tvm.transform.PassContext(opt_level=3): | ||
lib = relay.build( | ||
net, target=tvm.target.Target(target, host=target_host), params=params | ||
) | ||
|
||
tmp = tempdir() | ||
|
||
filename = "%s.so" % network | ||
lib.export_library(tmp.relpath(filename), ndk.create_shared) | ||
|
||
# upload library and params | ||
print_progress("%-20s uploading..." % network) | ||
|
||
# connect to remote device | ||
tracker = tvm.rpc.connect_tracker(args.host, args.port) | ||
remote = tracker.request(args.rpc_key) | ||
|
||
dev = remote.device(str(target), 0) | ||
remote.upload(tmp.relpath(filename)) | ||
|
||
rlib = remote.load_module(filename) | ||
module = runtime.GraphModule(rlib["default"](dev)) | ||
data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) | ||
module.set_input("data", data_tvm) | ||
|
||
# evaluate | ||
print_progress("%-20s evaluating..." % network) | ||
ftimer = module.module.time_evaluator("run", dev, number=1, repeat=repeat) | ||
prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond | ||
print( | ||
"%-20s %-19s (%s)" | ||
% (network + "-" + dtype, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)) | ||
) | ||
return (np.mean(prof_res), np.std(prof_res)) | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
"--network", | ||
type=str, | ||
choices=[ | ||
"resnet-18", | ||
"resnet-34", | ||
"resnet-50", | ||
"vgg-16", | ||
"vgg-19", | ||
"densenet-121", | ||
"inception_v3", | ||
"mobilenet", | ||
"squeezenet_v1.0", | ||
"squeezenet_v1.1", | ||
], | ||
help="The name of neural network", | ||
) | ||
parser.add_argument("--host", type=str, default="127.0.0.1") | ||
parser.add_argument("--port", type=int, default=9190) | ||
parser.add_argument("--rpc-key", type=str, default="android") | ||
parser.add_argument("--repeat", type=int, default=30) | ||
parser.add_argument("--tune", type=bool, default=False) | ||
args = parser.parse_args() | ||
|
||
if args.network is None: | ||
networks = [ | ||
"resnet-18", | ||
"resnet-34", | ||
"resnet-50", | ||
"vgg-16", | ||
"vgg-19", | ||
"densenet-121", | ||
"inception_v3", | ||
"mobilenet", | ||
"squeezenet_v1.0", | ||
"squeezenet_v1.1", | ||
] | ||
else: | ||
networks = [args.network] | ||
|
||
target = "opencl -device=adreno" | ||
target_host = "llvm -mtriple=arm64-linux-android" | ||
|
||
print("--------------------------------------------------") | ||
print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)")) | ||
print("--------------------------------------------------") | ||
|
||
results = {} | ||
|
||
for network in networks: | ||
ftime = evaluate_network(network, target, target_host, "float32", args.repeat) | ||
results[network + "-float32"] = ftime | ||
ftime = evaluate_network(network, target, target_host, "float16", args.repeat) | ||
results[network + "-float16"] = ftime | ||
|
||
print("----------------------------------------------------------------------") | ||
print("%-30s %-30s" % ("Network Name", "Mean Inference Time (std dev)")) | ||
print("----------------------------------------------------------------------") | ||
for key, val in results.items(): | ||
print("%-30s %-30s (%s)" % (key, "%.2f ms" % val[0], "%.2f ms" % val[1])) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/usr/bin/env bash | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
set -euxo pipefail | ||
|
||
source tests/scripts/setup-pytest-env.sh | ||
export PYTHONPATH=${PYTHONPATH}:${TVM_PATH}/apps/extension/python | ||
export LD_LIBRARY_PATH="build:${LD_LIBRARY_PATH:-}" | ||
|
||
export TVM_TRACKER_HOST=127.0.0.1 | ||
export TVM_TRACKER_PORT=$(((RANDOM % 100) + 9100)) | ||
export RPC_DEVICE_KEY="android" | ||
export TVM_NDK_CC="${ANDROID_NDK_HOME}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang" | ||
|
||
env PYTHONPATH=python python3 -m tvm.exec.rpc_tracker --host "${TVM_TRACKER_HOST}" --port "${TVM_TRACKER_PORT}" & | ||
TRACKER_PID=$! | ||
sleep 5 # Wait for tracker to bind | ||
|
||
export ANDROID_SERIAL=$2 | ||
|
||
adb shell "mkdir -p /data/local/tmp/tvm_ci" | ||
adb push build-adreno-target/tvm_rpc /data/local/tmp/tvm_ci/tvm_rpc_ci | ||
adb push build-adreno-target/libtvm_runtime.so /data/local/tmp/tvm_ci | ||
|
||
adb reverse tcp:${TVM_TRACKER_PORT} tcp:${TVM_TRACKER_PORT} | ||
adb forward tcp:5000 tcp:5000 | ||
adb forward tcp:5001 tcp:5001 | ||
adb forward tcp:5002 tcp:5002 | ||
env adb shell "cd /data/local/tmp/tvm_ci; killall -9 tvm_rpc_ci; sleep 2; LD_LIBRARY_PATH=/data/local/tmp/tvm_ci/ ./tvm_rpc_ci server --host=0.0.0.0 --port=5000 --port-end=5010 --tracker=127.0.0.1:${TVM_TRACKER_PORT} --key=${RPC_DEVICE_KEY}" & | ||
DEVICE_PID=$! | ||
sleep 5 # Wait for the device connections | ||
trap "{ kill ${TRACKER_PID}; kill ${DEVICE_PID}; }" 0 | ||
|
||
# cleanup pycache | ||
find . -type f -path "*.pyc" | xargs rm -f | ||
# Test TVM | ||
make cython3 | ||
|
||
if [ "texture" == $1 ] ; then | ||
python3 apps/benchmark/adreno/adreno_gpu_bench_texture.py --host ${TVM_TRACKER_HOST} --port ${TVM_TRACKER_PORT} --rpc-key ${RPC_DEVICE_KEY} | ||
fi | ||
|
||
kill ${TRACKER_PID} | ||
kill ${DEVICE_PID} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -727,6 +727,13 @@ def add_subparser( | |
"./tests/scripts/task_python_adreno.sh " + os.environ.get("ANDROID_SERIAL", ""), | ||
], | ||
), | ||
"benchmarks": ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any plans to run these benchmarks in the CI? @driazati probably you know, do we have any opportunities to run different performance tests e.g. once per week to check that no performance regressions were introduced to TVM mainline. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ideally this should be separate and as part of nightly instead of every push There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, totally agree. Not necessary to run it for every push. Probably, a new command to @tvm-bot can be added to run performance test for a PR if it is needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no concrete plans right now (we'd need to do some designing to figure out how to provision machines for benchmarks and where to store results) but I agree having benchmarks run on some cadence would be good for tracking our progress. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Script is ready to be used with CI provided we have Snapdragon devices on CI. For now we are using it internally. |
||
"run Adreno Texture Benchmarks", | ||
[ | ||
"./apps/benchmark/adreno/bench.sh texture " | ||
+ os.environ.get("ANDROID_SERIAL", ""), | ||
], | ||
), | ||
}, | ||
), | ||
] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just as an idea. If we want to use such benchmarks in the CI we can use statistic from tophub to measure performance w/o tuning. If someone changes schedules for Adreno then he/she will upload updated statistic to the tophub. In this way, we can avoid performance regressions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Already done. tlc-pack/tophub#27
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tuning option is left for tuning future Adreno variants if required.