Skip to content

Commit

Permalink
Code generation for Depthwise Conv2D via CMSIS-NN
Browse files Browse the repository at this point in the history
Change-Id: Iac6acf1c310b20b9e99bf0b9c0310423c93de9be
  • Loading branch information
ashutosh-arm committed Nov 17, 2021
1 parent 239fcdc commit c48e77a
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 45 deletions.
11 changes: 11 additions & 0 deletions python/tvm/relay/op/contrib/cmsisnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,16 @@ def check_qnn_conv2d(pattern):
kernel_zp = conv2d.args[3].data.numpy()
kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp

# check if depthwise Conv2D
kernel_layout = conv2d.attrs.kernel_layout
pos_o = kernel_layout.index("O")
groups = conv2d.attrs.groups
is_depthwise = False
if groups == int(conv2d_input.checked_type.shape[3]) and groups == int(
conv2d_weight.checked_type.shape[pos_o]
):
is_depthwise = True

return (
conv2d.attrs.out_dtype == "int32"
and conv2d.attrs.padding[2] == 0
Expand All @@ -132,6 +142,7 @@ def check_qnn_conv2d(pattern):
and pattern.checked_type.dtype == "int8"
and bias_dtype == "int32"
and all([zp == 0 for zp in kernel_zp])
and (not is_depthwise or bias_add is not None)
)

def binary_op_pattern(op):
Expand Down
26 changes: 20 additions & 6 deletions src/relay/backend/contrib/cmsisnn/generate_constants.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,25 @@ class GenerateConstantsMutator : public MixedModeMutator {
conv2d_call = requantize_input;
}

// Transpose weights: HWIO -> OHWI
auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
tvm::Attrs new_conv2d_attrs;
Expr transposed_kernel =
ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
tvm::Attrs new_conv2d_attrs = conv2d_call->attrs;
Expr conv2d_kernel = conv2d_call->args[1];

bool is_depthwise = false;
Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
Array<PrimExpr> kernel_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int kernel_pos_o = kernel_layout.find("O");
int groups = conv2d_attrs->groups;
if (groups == qnn::get_const_int(input_shape[3]) &&
groups == qnn::get_const_int(kernel_shape[kernel_pos_o])) {
is_depthwise = true;
}

// Transpose weights: HWIO -> OHWI for Conv2D
if (!is_depthwise) {
conv2d_kernel = ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
}

// Obtain input and output scales from Relay's Requantization
int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
Expand Down Expand Up @@ -153,11 +167,11 @@ class GenerateConstantsMutator : public MixedModeMutator {
req_inp_scale = Constant(req_inp_scale_nda);
}

// Replace existing weights (HWIO) with the transposed ones (OHWI)
// Replace existing weights (HWIO) with the transposed ones (OHWI) for Conv2D
// Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
// Substitute Requantize input_zero_point with CMSIS-NN shift
// Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel, conv2d_call->args[2],
Array<Expr> conv2d_args = {conv2d_call->args[0], conv2d_kernel, conv2d_call->args[2],
multiplier_const, conv2d_call->args[4], weight_scale};
Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
if (bias_add_call) {
Expand Down
43 changes: 31 additions & 12 deletions src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ class RelayToTIRVisitor : public MixedModeMutator {
int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
int32_t out_channels = qnn::get_const_int(conv2d_attrs->channels);
int32_t groups = conv2d_attrs->groups;
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int32_t clip_min, clip_max;
if (clip_call) {
const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
Expand All @@ -156,14 +159,6 @@ class RelayToTIRVisitor : public MixedModeMutator {
clip_max = 127;
}

tvm::Array<PrimExpr> call_ext_args = {tir::StringImm("arm_convolve_wrapper_s8"), input, filter,
multiplier};
if (bias_add_call) {
call_ext_args.push_back(bias);
}
call_ext_args.push_back(shift);
call_ext_args.push_back(output);

tvm::Array<PrimExpr> scalar_args = {ToArg(input_offset), ToArg(output_offset), ToArg(stride_w),
ToArg(stride_h), ToArg(padding_w), ToArg(padding_h),
ToArg(dilation_w), ToArg(dilation_h), ToArg(clip_min),
Expand All @@ -173,18 +168,42 @@ class RelayToTIRVisitor : public MixedModeMutator {
Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
Array<PrimExpr> input_dims = CMSISNNDimensions(input_shape);

// cmsis_nn_dims *filter_dims (OHWI)
// cmsis_nn_dims *filter_dims (OHWI for Conv2D and IHWO for depthwise)
Array<PrimExpr> filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
Array<PrimExpr> filter_dims = CMSISNNDimensions(filter_shape);

// cmsis_nn_dims *bias_dims (1,1,1,output_channels)
Array<PrimExpr> bias_shape{1, 1, 1, filter_shape[0]};
// cmsis_nn_dims *bias_dims
Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
Array<PrimExpr> bias_dims = CMSISNNDimensions(bias_shape);

// cmsis_nn_dims *output_dims (NHWC)
// cmsis_nn_dims *output_dims (same order as input_dims)
Array<PrimExpr> output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;
Array<PrimExpr> output_dims = CMSISNNDimensions(output_shape);

int32_t depth_multiplier = -1;
int kernel_pos_o = kernel_layout.find("O");
if (groups == qnn::get_const_int(input_shape[3]) &&
groups == qnn::get_const_int(filter_shape[kernel_pos_o])) {
int kernel_pos_i = kernel_layout.find("I");
depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_i]);
}
scalar_args.push_back(ToArg(depth_multiplier));

// original filter_layout for depthwise is HWOI
std::string cmsisnn_api = "arm_convolve_wrapper_s8";
if (depth_multiplier != -1) {
cmsisnn_api = "arm_depthwise_conv_wrapper_s8";
Array<PrimExpr> depthwise_filter_shape{1, filter_shape[0], filter_shape[1], out_channels};
filter_dims = CMSISNNDimensions(depthwise_filter_shape);
}

tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter, multiplier};
if (bias_add_call) {
call_ext_args.push_back(bias);
}
call_ext_args.push_back(shift);
call_ext_args.push_back(output);

// https://github.com/ARM-software/CMSIS_5/blob/d788fd583984388553391de18afd8b4d2a146868/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c#L367
std::string context_buffer_name = "NULL";
size_t context_buffer_size =
Expand Down
31 changes: 20 additions & 11 deletions src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {
if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
cmsis_func_name == "arm_elementwise_add_s8") {
CodeGenC::VisitExpr_(op, os);
} else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
} else if (cmsis_func_name == "arm_convolve_wrapper_s8" ||
cmsis_func_name == "arm_depthwise_conv_wrapper_s8") {
EmitConv2D(op);
}
return;
Expand All @@ -87,8 +88,12 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {
std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
int32_t stride_w, int32_t stride_h, int32_t padding_w,
int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
int32_t clip_min, int32_t clip_max) {
std::string struct_name = "conv_params";
int32_t clip_min, int32_t clip_max, int32_t depth_multiplier) {
std::string struct_name = "cmsis_nn_conv_params";
std::string instance_name = "conv_params";
if (depth_multiplier != -1) {
struct_name = "cmsis_nn_dw_conv_params";
}
PrintIndent();
os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
PrintIndent();
Expand All @@ -98,9 +103,12 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {
PrintIndent();
os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
PrintIndent();
os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
<< ", stride, padding, dilation, activation};\n";
return struct_name;
os << struct_name << " " << instance_name << " = {" << input_offset << ", " << output_offset;
if (depth_multiplier != -1) {
os << ", " << depth_multiplier;
}
os << ", stride, padding, dilation, activation};\n";
return instance_name;
}

/*! * \brief Emits cmsis_nn_per_channel_quant_params struct */
Expand All @@ -125,7 +133,7 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {

/*! * \brief Emits CMSIS-NN APIs for every call_extern */
void EmitConv2D(const CallNode* op) {
static const int max_num_args = 35;
static const int max_num_args = 36;
std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;

bool bias_enabled = false;
Expand All @@ -143,7 +151,7 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {
std::string input_data = get_var_name(op, ++arg_id);
std::string filter_data = get_var_name(op, ++arg_id);
std::string multiplier = get_var_name(op, ++arg_id);
std::string bias_data("0x0");
std::string bias_data("NULL");
if (bias_enabled) {
bias_data = get_var_name(op, ++arg_id);
}
Expand All @@ -162,6 +170,7 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {
int dilation_h = get_arg_value(op, ++arg_id);
int clip_min = get_arg_value(op, ++arg_id);
int clip_max = get_arg_value(op, ++arg_id);
int depth_multiplier = get_arg_value(op, ++arg_id);
int input_n = get_arg_value(op, ++arg_id);
int input_h = get_arg_value(op, ++arg_id);
int input_w = get_arg_value(op, ++arg_id);
Expand All @@ -180,9 +189,9 @@ class CodeGenCMSISNN : public codegen::CodeGenCHost {
int output_c = get_arg_value(op, ++arg_id);

std::string context = EmitCMSISNNContext(stream, context_buffer_name, context_buffer_size);
std::string conv_params =
EmitCMSISNNConvParams(stream, input_offset, output_offset, stride_w, stride_h, padding_w,
padding_h, dilation_w, dilation_h, clip_min, clip_max);
std::string conv_params = EmitCMSISNNConvParams(
stream, input_offset, output_offset, stride_w, stride_h, padding_w, padding_h, dilation_w,
dilation_h, clip_min, clip_max, depth_multiplier);
std::string quant_params = EmitCMSISNNPerChannelQuantParams(stream, multiplier, shift);
std::string input_dim = EmitCMSISNNDims(stream, "input", input_n, input_h, input_w, input_c);
std::string filter_dim =
Expand Down
37 changes: 22 additions & 15 deletions tests/python/contrib/test_cmsisnn/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,20 @@ def make_model(
p = (0, 0, 0, 0)
if padding == "SAME":
p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
a = relay.nn.pad(
a,
invar = relay.nn.pad(
invar,
pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
pad_value=input_zero_point,
pad_mode="constant",
)
shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])

weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
rng = np.random.default_rng(12321)
w = tvm.nd.array(
rng.integers(
np.iinfo(kernel_dtype).min,
high=np.iinfo(kernel_dtype).max,
size=weight_shape,
size=kernel_shape,
dtype=kernel_dtype,
)
)
Expand Down Expand Up @@ -129,8 +128,11 @@ def make_model(
@pytest.mark.parametrize("kernel_size", [(3, 3)])
@pytest.mark.parametrize("padding", ["SAME", "VALID"])
@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
@pytest.mark.parametrize("enable_bias", [True, False])
@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
@pytest.mark.parametrize(
"conv_type, depth_multiplier, enable_bias",
[("conv2d", 1, True), ("conv2d", 1, False), ("depthwise", 1, True), ("depthwise", 3, True)],
)
@pytest.mark.parametrize(
"input_zero_point, input_scale, kernel_scale, out_channels",
[(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
Expand All @@ -147,40 +149,45 @@ def test_op_int8(
input_scale,
kernel_scale,
out_channels,
conv_type,
depth_multiplier,
):
interface_api = "c"
use_unpacked_api = True
test_runner = AOT_CORSTONE300_RUNNER

kernel_zero_point = 0
dtype = "int8"
groups = 1
weight_format = "HWIO"
kernel_h = kernel_size[0]
kernel_w = kernel_size[1]
dtype = "int8"
kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
kernel_zero_point = 0
in_min, in_max = get_range_for_dtype_str(dtype)

weight_shape = None
if weight_format == "HWIO":
weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
else:
weight_shape = (kernel_h, kernel_w, ifm_shape[3], out_channels)
if conv_type == "depthwise":
groups = ifm_shape[3]
weight_format = "HWOI"
kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
out_channels = ifm_shape[3] * depth_multiplier
ks_len = len(kernel_scale)
kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]

output_scale, output_zero_point = get_conv2d_qnn_params(
weight_shape,
kernel_shape,
input_scale,
input_zero_point,
kernel_scale,
kernel_zero_point,
dtype,
dtype,
dtype,
False,
conv_type == "depthwise",
)

model, params = make_model(
ifm_shape,
weight_shape,
kernel_shape,
input_zero_point,
input_scale,
kernel_zero_point,
Expand Down
2 changes: 1 addition & 1 deletion tests/python/driver/tvmc/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ def test_compile_tflite_module_with_external_codegen_cmsisnn(
for name in mlf_package.getnames()
if re.match(r"\./codegen/host/src/\D+\d+\.c", name)
]
assert len(c_source_files) == 5
assert len(c_source_files) == 3


@pytest.mark.skipif(
Expand Down

0 comments on commit c48e77a

Please sign in to comment.