Skip to content

Commit

Permalink
[BYOC] [ACL] 20.05 memory corruption temporarely fix
Browse files Browse the repository at this point in the history
This fix intended to prevent execution of operations via ACL runtime
in case if its arguments require memory padding. This fix is temprary
and intended for ACL 20.05 and should be removed after migration
to ACL 20.11
  • Loading branch information
d-smirnov committed Oct 22, 2020
1 parent f65e320 commit 701b42b
Show file tree
Hide file tree
Showing 10 changed files with 178 additions and 44 deletions.
16 changes: 15 additions & 1 deletion include/tvm/relay/op_attr_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ using FTVMLegalize = runtime::TypedPackedFunc<Expr(const Attrs& attrs, const Arr
* \brief Annotates an expression to indicate if an op should be compiled using
* the given compiler/target.
*
* \param attrs The attribute of the original expr.
:* \param attrs The attribute of the original expr.
* \param args The arguments of the original expr.
*
* \return true if this op should be registered to invoke a specific compiler
Expand All @@ -185,6 +185,20 @@ using FTVMLegalize = runtime::TypedPackedFunc<Expr(const Attrs& attrs, const Arr
using FTVMAnnotateTarget = runtime::TypedPackedFunc<bool(const Attrs& attrs, // NOLINT(*)
const Array<Expr>& args)>;

/*!
* \brief Annotates an expression to indicate if an op should be compiled using
* the given compiler/target.
* \param attrs The attribute of the original expr.
* \param args The arguments of the original expr.
* \param out_type The return type of the original expr.
*
* \return true if this op should be registered to invoke a specific compiler
* for codegen, otherwise, false.
*/
using FTVMAnnotateTarget3 =
runtime::TypedPackedFunc<bool(const Attrs& attrs, // NOLINT(*)
const Array<Expr>& args, const Type& out_type)>;

/*!
* \brief Forward rewriting rule for a specific op.
*
Expand Down
72 changes: 55 additions & 17 deletions python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
# pylint: disable=invalid-name, unused-argument
"""Arm Compute Library supported operators."""
import tvm
import numpy as np

from tvm.relay.expr import const
from tvm.relay import transform
from tvm.relay.build_module import bind_params_by_name
Expand Down Expand Up @@ -183,7 +185,7 @@ def check_dense(extract):
call = extract
while call.op.name != "nn.dense":
call = call.args[0]
return dense(call.attrs, call.args)
return dense(call.attrs, call.args, call.checked_type)

def check_qnn_dense(extract):
"""Check qnn conv pattern is supported by ACL."""
Expand All @@ -192,7 +194,7 @@ def check_qnn_dense(extract):
call = extract
while call.op.name != "qnn.dense":
call = call.args[0]
return qnn_dense(call.attrs, call.args)
return qnn_dense(call.attrs, call.args, call.checked_type)

def check_avg_pool2d(extract):
"""Check average pool2d pattern is supported by ACL."""
Expand All @@ -201,12 +203,12 @@ def check_avg_pool2d(extract):
pool = extract.args[0]
if pool.args[0].attrs.dtype != "int32":
return False
return avg_pool2d(pool.attrs, pool.args, from_quantized_composite=True)
return avg_pool2d(pool.attrs, pool.args, pool.checked_type, from_quantized_composite=True)

def check_l2_pool2d(extract):
"""Check l2 pool2d pattern is supported by ACL."""
pool = extract.args[0]
return avg_pool2d(pool.attrs, pool.args)
return avg_pool2d(pool.attrs, pool.args, pool.checked_type)

return [
("arm_compute_lib.conv2d", conv_pattern(), check_conv),
Expand All @@ -227,9 +229,10 @@ def _func_wrapper(attrs, args):
return _func_wrapper


# Reshape does not need padding check in 20.05
_register_external_op_helper("reshape")


# conv2d does not need padding check in 20.05
@tvm.ir.register_op_attr("nn.conv2d", "target.arm_compute_lib")
def conv2d(attrs, args):
"""Check if the external ACL codegen for conv2d should be used."""
Expand All @@ -248,6 +251,7 @@ def conv2d(attrs, args):
return True


# conv2d does not need padding check in 20.05
def qnn_conv2d(attrs, args):
"""Check if the external ACL codegen for qnn.conv2d should be used."""
if attrs.groups != 1:
Expand All @@ -266,7 +270,7 @@ def qnn_conv2d(attrs, args):


@tvm.ir.register_op_attr("nn.dense", "target.arm_compute_lib")
def dense(attrs, args):
def dense(attrs, args, out_type):
"""Check if the external ACL codegen for dense should be used."""
data_typ = args[0].checked_type
if data_typ.dtype != "float32":
Expand All @@ -276,10 +280,10 @@ def dense(attrs, args):
return False
if attrs.out_dtype != "float32" and attrs.out_dtype != "":
return False
return True
return not padding_required([*args, out_type])


def qnn_dense(attrs, args):
def qnn_dense(attrs, args, out_type):
"""Check if the external ACL codegen for qnn.dense should be used."""
data_typ = args[0].checked_type
if data_typ.dtype != "uint8":
Expand All @@ -289,24 +293,57 @@ def qnn_dense(attrs, args):
return False
if attrs.out_dtype != "int32":
return False
return True

return not padding_required([*args, out_type])


@tvm.ir.register_op_attr("nn.max_pool2d", "target.arm_compute_lib")
def max_pool2d(attrs, args):
def max_pool2d(attrs, args, out_type):
"""Check if the external ACL codegen for maxpool2d should be used."""
if attrs.layout != "NHWC":
return False
typ = args[0].checked_type
if typ.dtype not in ["float32", "uint8"]:
return False
return True
return not padding_required([*args, out_type])


def padding_required(inputs):
"""Checks whether supplied data will require padding.
Most of the operators ACL up to 20.11 uses padded data.
"""

def _check(shape, dtype):
"""NEON has 128bits/16bytes per vector"""
if len(shape) == 0:
return False
return (shape[-1] * np.dtype(dtype).itemsize) % 16 != 0

def _padding_required():
for i in inputs:
if isinstance(i, (tvm.relay.expr.Var, tvm.relay.expr.Call)):
if _check(i.checked_type.shape, i.checked_type.dtype):
return True
elif isinstance(i, tvm.relay.expr.Constant):
if _check(i.data.shape, i.data.dtype):
return True
elif isinstance(i, tvm.ir.tensor_type.TensorType):
if _check(i.shape, i.dtype):
return True
else:
raise Exception("Not supported")

return False

result = _padding_required()
return result


@tvm.ir.register_op_attr("nn.avg_pool2d", "target.arm_compute_lib")
def avg_pool2d(attrs, args, from_quantized_composite=False):
def avg_pool2d(attrs, args, out_type, from_quantized_composite=False):
"""Check if the external ACL codegen for avgpool2d should be used."""
typ = args[0].checked_type

if from_quantized_composite:
if typ.dtype != "int32":
return False
Expand All @@ -315,29 +352,30 @@ def avg_pool2d(attrs, args, from_quantized_composite=False):
return False
if attrs.layout != "NHWC":
return False
return True

return not padding_required([*args, out_type])


@tvm.ir.register_op_attr("nn.global_max_pool2d", "target.arm_compute_lib")
def global_max_pool2d(attrs, args):
def global_max_pool2d(attrs, args, out_type):
"""Check if the external ACL codegen for gloval_maxpool2d should be used."""
typ = args[0].checked_type
if typ.dtype not in ["float32", "uint8"]:
return False
if attrs.layout != "NHWC":
return False
return True
return not padding_required([*args, out_type])


@tvm.ir.register_op_attr("nn.global_avg_pool2d", "target.arm_compute_lib")
def global_avg_pool2d(attrs, args):
def global_avg_pool2d(attrs, args, out_type):
"""Check if the external ACL codegen for global_avgpool2d should be used."""
typ = args[0].checked_type
if typ.dtype not in ["float32"]:
return False
if attrs.layout != "NHWC":
return False
return True
return not padding_required([*args, out_type])


@tvm.ir.register_op_attr("maximum", "target.arm_compute_lib")
Expand Down
13 changes: 11 additions & 2 deletions src/relay/transforms/annotate_target.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,17 @@ class AnnotateTargetRewriter : public ExprRewriter {
if (!Op::HasAttrMap("target." + std::string(target))) {
continue;
}
auto fannotate = Op::GetAttrMap<FTVMAnnotateTarget>("target." + std::string(target));
if (fannotate.count(op) && fannotate[op](pre->attrs, pre->args)) {
bool result = false;
try {
auto fannotate = Op::GetAttrMap<FTVMAnnotateTarget>("target." + std::string(target));
result = (fannotate.count(op) && fannotate[op](pre->attrs, pre->args));
} catch (...) {
auto fannotate = Op::GetAttrMap<FTVMAnnotateTarget3>("target." + std::string(target));
result =
(fannotate.count(op) && fannotate[op](pre->attrs, pre->args, pre->checked_type()));
}

if (result) {
supported_targets.push_back(target);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/runtime/contrib/arm_compute_lib/acl_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ arm_compute::Tensor MakeACLTensor(const JSONGraphNode& tensor_rep, void* data,
std::vector<int64_t> shape = tensor_rep.GetOpShape()[0];
DLDataType dtype = tensor_rep.GetOpDataType()[0];
arm_compute::TensorInfo info = MakeACLTensorInfo(shape, dtype, scale, offset);
info.set_is_resizable(false);
tensor.allocator()->init(info);
if (data != nullptr) {
CheckACLError(tensor.allocator()->import_memory(data));
Expand Down
3 changes: 2 additions & 1 deletion tests/python/contrib/test_arm_compute_lib/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,11 @@ def verify_codegen(
module,
known_good_codegen,
num_acl_modules,
tvm_ops=0,
target="llvm -mtriple=aarch64-linux-gnu -mattr=+neon",
):
"""Check acl codegen against a known good output."""
module = build_module(module, target)
module = build_module(module, target, tvm_ops=tvm_ops, acl_partitions=num_acl_modules)
acl_modules = extract_acl_modules(module)

assert len(acl_modules) == num_acl_modules, (
Expand Down
62 changes: 48 additions & 14 deletions tests/python/contrib/test_arm_compute_lib/test_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

import tvm
from tvm import relay

from .infrastructure import (
from tvm import testing
from test_arm_compute_lib.infrastructure import (
Device,
skip_runtime_test,
skip_codegen_test,
Expand Down Expand Up @@ -185,18 +185,34 @@ def test_dense():
np.random.seed(0)

dtype = ["float32"]
shape = [((1, 128), (16, 128), 16), ((32, 32), (32, 32), 32), ((1, 64), (1, 64), 1)]
shape = [
(1, (1, 128), (16, 128), 16),
(1, (32, 32), (32, 32), 32),
(0, (1, 64), (1, 64), 1),
(0, (11, 2), (2, 2), 2),
]
composite = [False, True]
trials = generate_trials([dtype, shape, composite], 3)

for dtype, (shape, weight_shape, units), composite in trials:
for dtype, (acl_partitions, shape, weight_shape, units), composite in trials:
outputs = []
inputs = {"a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype))}
func, params = _get_model(
shape, weight_shape, units, dtype, var_names=iter(inputs), has_bias=composite
)
for acl in [False, True]:
outputs.append(build_and_run(func, inputs, 1, params, device, enable_acl=acl)[0])
outputs.append(
build_and_run(
func,
inputs,
1,
params,
device,
enable_acl=acl,
tvm_ops=(1 - acl_partitions) * (2 - int(not composite)),
acl_partitions=acl_partitions,
)[0]
)

config = {
"shape": shape,
Expand All @@ -215,18 +231,18 @@ def test_codegen_dense():
np.random.seed(0)

dtype = ["float32"]
shape = [((1, 128), (16, 128), 16), ((32, 32), (32, 32), 32), ((1, 64), (1, 64), 1)]
shape = [(1, (1, 128), (16, 128), 16), (1, (32, 32), (32, 32), 32), (0, (1, 64), (1, 64), 1)]
composite = [False, True]
trials = generate_trials([dtype, shape, composite], 3)

for dtype, (shape, weight_shape, units), composite in trials:
for dtype, (acl_partitions, shape, weight_shape, units), composite in trials:
inputs = {"a"}

args = (shape, weight_shape, units, dtype)

func, params = _get_model(*args, var_names=iter(inputs), has_bias=composite)
exp_codegen = _get_expected_codegen(*args, has_bias=composite)
verify_codegen(func, exp_codegen, 1)
verify_codegen(func, exp_codegen, acl_partitions, 1 - acl_partitions)


def test_qnn_dense():
Expand All @@ -239,11 +255,18 @@ def test_qnn_dense():
np.random.seed(0)

dtype = ["uint8"]
shape = [((1, 128), (16, 128), 16), ((32, 32), (32, 32), 32), ((1, 64), (1, 64), 1)]
shape = [
(0, (4, 4), (4, 4), 4),
(1, (16, 16), (4, 16), 4),
(1, (1, 128), (16, 128), 16),
(1, (32, 32), (32, 32), 32),
(0, (1, 64), (1, 64), 1),
]

composite = [False, True]
trials = generate_trials([dtype, shape, composite], 3)

for dtype, (shape, weight_shape, units), composite in trials:
for dtype, (acl_partitions, shape, weight_shape, units), composite in trials:
outputs = []
inputs = {"a": tvm.nd.array(np.random.uniform(0, 255, shape).astype(dtype))}
input_zp = 100
Expand All @@ -270,7 +293,18 @@ def test_qnn_dense():
)

for acl in [False, True]:
outputs.append(build_and_run(func, inputs, 1, params, device, enable_acl=acl)[0])
outputs.append(
build_and_run(
func,
inputs,
1,
params,
device,
tvm_ops=(1 - acl_partitions) * (3 - int(not composite)),
acl_partitions=acl_partitions,
enable_acl=acl,
)[0]
)

config = {
"shape": shape,
Expand All @@ -295,11 +329,11 @@ def test_codegen_qnn_dense():
np.random.seed(0)

dtype = ["uint8"]
shape = [((1, 128), (16, 128), 16), ((32, 32), (32, 32), 32), ((1, 64), (1, 64), 1)]
shape = [(1, (1, 128), (16, 128), 16), (1, (32, 32), (32, 32), 32), (0, (1, 64), (1, 64), 1)]
composite = [False, True]
trials = generate_trials([dtype, shape, composite], 3)

for dtype, (shape, weight_shape, units), composite in trials:
for dtype, (acl_partitions, shape, weight_shape, units), composite in trials:
inputs = {"a"}
args = (shape, weight_shape, units, dtype)

Expand All @@ -323,7 +357,7 @@ def test_codegen_qnn_dense():
has_bias=composite,
)
exp_codegen = _get_expected_codegen(*args, has_bias=composite)
verify_codegen(func, exp_codegen, 1)
verify_codegen(func, exp_codegen, acl_partitions, 2 - 2 * acl_partitions)


if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions tests/python/contrib/test_arm_compute_lib/test_maximum.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import tvm
from tvm import relay
from tvm import testing

from .infrastructure import (
skip_runtime_test,
Expand Down
Loading

0 comments on commit 701b42b

Please sign in to comment.