Skip to content
This repository has been archived by the owner on Nov 25, 2022. It is now read-only.

Commit

Permalink
[CMSIS-NN][Perf] Converted Relay Conv2D into CMSIS-NN Depthwise (apac…
Browse files Browse the repository at this point in the history
  • Loading branch information
ashutosh-arm authored and xinetzone committed Nov 25, 2022
1 parent cbf4396 commit bfa07c3
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 15 deletions.
5 changes: 5 additions & 0 deletions apps/microtvm/zephyr_cmsisnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ set(DATA_FILES
)
set(CMSIS_SOURCES
${CMSIS_PATH}/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c
${CMSIS_PATH}/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c
${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c
Expand Down
46 changes: 46 additions & 0 deletions src/relay/backend/contrib/cmsisnn/convolutions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "convolutions.h"

#include <string>

#include "../../../qnn/utils.h"
#include "tvm/ir/transform.h"
#include "tvm/relay/attrs/nn.h"

namespace tvm {
namespace relay {
namespace contrib {
namespace cmsisnn {

bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
const Array<PrimExpr>& kernel_shape) {
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int kernel_pos_o = kernel_layout.find("O");
int kernel_pos_i = kernel_layout.find("I");
int kernel_dim_o_val = qnn::get_const_int(kernel_shape[kernel_pos_o]);
int kernel_dim_i_val = qnn::get_const_int(kernel_shape[kernel_pos_i]);
int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
return (out_channels == kernel_dim_o_val * kernel_dim_i_val);
}

} // namespace cmsisnn
} // namespace contrib
} // namespace relay
} // namespace tvm
60 changes: 60 additions & 0 deletions src/relay/backend/contrib/cmsisnn/convolutions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/relay/backend/contrib/cmsisnn/convolutions.h
* \brief CMSIS-NN utility functions for Convolutions
*/

#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_

#include <tvm/relay/attrs/nn.h>
#include <tvm/relay/attrs/transform.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/transform.h>
#include <tvm/runtime/ndarray.h>

#include "../../../op/make_op.h"
#include "../../../qnn/utils.h"
#include "../../../transforms/pattern_utils.h"

namespace tvm {
namespace relay {
namespace contrib {
namespace cmsisnn {
/*!
* \brief Checks if Relay Conv2D was originally CMSIS-NN compliant Depthwise Convolution
* See:
* https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2107
*
*
* \return true if a Conv2D is a Depthwise Convolution based on Conv2D's inputs' shapes and
* attributes
*/

bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array<PrimExpr>& input_shape,
const Array<PrimExpr>& kernel_shape);

} // namespace cmsisnn
} // namespace contrib
} // namespace relay
} // namespace tvm

#endif // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_
7 changes: 2 additions & 5 deletions src/relay/backend/contrib/cmsisnn/generate_constants.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "../../../op/make_op.h"
#include "../../../qnn/utils.h"
#include "../../../transforms/pattern_utils.h"
#include "convolutions.h"

namespace tvm {
namespace relay {
Expand Down Expand Up @@ -111,11 +112,7 @@ class GenerateConstantsMutator : public MixedModeMutator {

Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
Array<PrimExpr> kernel_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int kernel_pos_o = kernel_layout.find("O");
int groups = conv2d_attrs->groups;
if (groups != qnn::get_const_int(input_shape[3]) ||
groups != qnn::get_const_int(kernel_shape[kernel_pos_o])) {
if (!IsCMSISNNDepthwise(conv2d_attrs, input_shape, kernel_shape)) {
// Transpose weights: HWIO -> OHWI for Conv2D
conv2d_kernel = ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
}
Expand Down
13 changes: 7 additions & 6 deletions src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Expand Down Expand Up @@ -31,6 +30,7 @@
#include "../../../transforms/pattern_utils.h"
#include "buffer_size.h"
#include "compiler_attrs.h"
#include "convolutions.h"

namespace tvm {
namespace relay {
Expand Down Expand Up @@ -173,7 +173,6 @@ class RelayToTIRVisitor : public MixedModeMutator {
int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
int32_t out_channels = qnn::get_const_int(conv2d_attrs->channels);
int32_t groups = conv2d_attrs->groups;
std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
int32_t clip_min = std::numeric_limits<int8_t>::min();
int32_t clip_max = std::numeric_limits<int8_t>::max();
Expand Down Expand Up @@ -207,11 +206,13 @@ class RelayToTIRVisitor : public MixedModeMutator {
int32_t output_c = qnn::get_const_int(output_shape[3]);

int32_t depth_multiplier = -1;
int kernel_pos_o = kernel_layout.find("O");
if (groups == qnn::get_const_int(input_shape[3]) &&
groups == qnn::get_const_int(filter_shape[kernel_pos_o])) {
if (IsCMSISNNDepthwise(conv2d_attrs, input_shape, filter_shape)) {
// Refer to TVM frontend to know how depth multiplier and out_channels are related
// https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2129
int kernel_pos_i = kernel_layout.find("I");
depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_i]);
int kernel_pos_o = kernel_layout.find("O");
int kernel_pos_dm = input_c == 1 ? kernel_pos_o : kernel_pos_i;
depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_dm]);
}
scalar_args.push_back(ToArg(depth_multiplier));

Expand Down
144 changes: 140 additions & 4 deletions tests/python/contrib/test_cmsisnn/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@
from tvm import relay
from tvm.relay.op.contrib import cmsisnn

from tvm.testing.aot import generate_ref_data, AOTTestModel, compile_models, compile_and_run

from tvm.testing.aot import (
generate_ref_data,
AOTTestModel,
compile_models,
compile_and_run,
run_and_check,
)
from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
from .utils import (
make_module,
Expand Down Expand Up @@ -84,13 +89,14 @@ def make_model(
)
)
weight_const = relay.const(weight, kernel_dtype)
conv2d_kernel_sc = kernel_scale[0] if out_channels == 1 else kernel_scale
conv = relay.qnn.op.conv2d(
invar,
weight_const,
input_zero_point=relay.const(input_zero_point, "int32"),
kernel_zero_point=relay.const(kernel_zero_point, "int32"),
input_scale=relay.const(input_scale, "float32"),
kernel_scale=relay.const(kernel_scale, "float32"),
kernel_scale=relay.const(conv2d_kernel_sc, "float32"),
kernel_size=(kernel_h, kernel_w),
data_layout="NHWC",
kernel_layout=weight_format,
Expand All @@ -105,6 +111,7 @@ def make_model(
bias_const = relay.const(bias, "int32")
last_op = relay.nn.bias_add(conv, bias_const, axis=3) if enable_bias else conv
requant_input_sc = [sc * input_scale for sc in kernel_scale]
requant_input_sc = requant_input_sc[0] if out_channels == 1 else requant_input_sc
last_op = relay.qnn.op.requantize(
last_op,
relay.const(requant_input_sc, "float32"),
Expand Down Expand Up @@ -209,7 +216,7 @@ def test_conv2d_number_primfunc_args(
cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
assert (
len(cmsisnn_func.params) == expected_num_params
), "Generated unexpected number of function arguments"
), "Generated unexpected number of function arguments."


@tvm.testing.requires_cmsisnn
Expand Down Expand Up @@ -540,6 +547,135 @@ def test_depthwise_int8(
)


@tvm.testing.requires_cmsisnn
@pytest.mark.parametrize("padding", ["SAME", "VALID"])
@pytest.mark.parametrize("strides, dilation", [((1, 1), (1, 1))])
@pytest.mark.parametrize("relu_type", ["RELU", "NONE"])
@pytest.mark.parametrize("depth_multiplier", [1, 3])
@pytest.mark.parametrize(
"input_zero_point, input_scale, kernel_scale",
[
(
10,
0.0128,
[0.11, 0.22],
),
(
-64,
1,
[1, 0.0256, 1.37],
),
],
)
def test_relay_conv2d_cmsisnn_depthwise_int8(
padding,
strides,
dilation,
relu_type,
input_zero_point,
input_scale,
kernel_scale,
depth_multiplier,
):
"""Tests QNN Depthwise int8 op via CMSIS-NN"""
interface_api = "c"
use_unpacked_api = True
test_runner = AOT_USMP_CORSTONE300_RUNNER

dtype = "int8"
in_min, in_max = get_range_for_dtype_str(dtype)

ifm_shape = (1, 24, 24, 1)
groups = ifm_shape[3]
weight_format = "HWIO"
(kernel_h, kernel_w) = (3, 3)
kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
out_channels = ifm_shape[3] * depth_multiplier
enable_bias = True
ks_len = len(kernel_scale)
kernel_zero_point = 0
kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]

output_scale, output_zero_point = get_conv2d_qnn_params(
kernel_shape,
input_scale,
input_zero_point,
kernel_scale,
kernel_zero_point,
dtype,
dtype,
dtype,
True,
)

model, params = make_model(
ifm_shape,
kernel_shape,
input_zero_point,
input_scale,
kernel_zero_point,
kernel_scale,
output_zero_point,
output_scale,
padding,
strides,
dilation,
groups,
dtype,
dtype,
out_channels,
weight_format,
enable_bias,
relu_type,
)
orig_mod = make_module(model)
cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

# validate pattern matching
assert_partitioned_function(orig_mod, cmsisnn_mod)

# generate reference output
rng = np.random.default_rng(12345)
inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
output_list = generate_ref_data(orig_mod["main"], inputs, params)

# validate presence of depthwise convolution
compiled_models = compile_models(
AOTTestModel(
module=cmsisnn_mod,
inputs=inputs,
outputs=output_list,
params=params,
output_tolerance=1,
),
interface_api,
use_unpacked_api,
pass_config=test_runner.pass_config,
)

cmsisnn_tir_mod = None
for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
if target.kind.name == "cmsis-nn":
cmsisnn_tir_mod = mod

cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
call_extern = None
if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate):
call_extern = cmsisnn_func.body.value
else:
call_extern = cmsisnn_func.body.body.value
assert (
call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8"
), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution."

# validate the output
run_and_check(
models=compiled_models,
runner=test_runner,
interface_api=interface_api,
)


def parameterize_for_invalid_model(test):
"""Generates non int8 inputs"""
in_dtype = ["uint8", "int8"]
Expand Down

0 comments on commit bfa07c3

Please sign in to comment.