diff --git a/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt b/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt index b09e1d0642d2..dd3582f86f7d 100644 --- a/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt +++ b/apps/microtvm/zephyr_cmsisnn/CMakeLists.txt @@ -53,6 +53,11 @@ set(DATA_FILES ) set(CMSIS_SOURCES ${CMSIS_PATH}/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c + ${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c + ${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c + ${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c + ${CMSIS_PATH}/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c + ${CMSIS_PATH}/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c ${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c ${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c ${CMSIS_PATH}/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c diff --git a/src/relay/backend/contrib/cmsisnn/convolutions.cc b/src/relay/backend/contrib/cmsisnn/convolutions.cc new file mode 100644 index 000000000000..ebac83b81250 --- /dev/null +++ b/src/relay/backend/contrib/cmsisnn/convolutions.cc @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "convolutions.h" + +#include + +#include "../../../qnn/utils.h" +#include "tvm/ir/transform.h" +#include "tvm/relay/attrs/nn.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace cmsisnn { + +bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array& input_shape, + const Array& kernel_shape) { + std::string kernel_layout = conv2d_attrs->kernel_layout.c_str(); + int kernel_pos_o = kernel_layout.find("O"); + int kernel_pos_i = kernel_layout.find("I"); + int kernel_dim_o_val = qnn::get_const_int(kernel_shape[kernel_pos_o]); + int kernel_dim_i_val = qnn::get_const_int(kernel_shape[kernel_pos_i]); + int64_t out_channels = conv2d_attrs->channels.as()->value; + return (out_channels == kernel_dim_o_val * kernel_dim_i_val); +} + +} // namespace cmsisnn +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/cmsisnn/convolutions.h b/src/relay/backend/contrib/cmsisnn/convolutions.h new file mode 100644 index 000000000000..e635702bf353 --- /dev/null +++ b/src/relay/backend/contrib/cmsisnn/convolutions.h @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/cmsisnn/convolutions.h + * \brief CMSIS-NN utility functions for Convolutions + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_ +#define TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_ + +#include +#include +#include +#include +#include + +#include "../../../op/make_op.h" +#include "../../../qnn/utils.h" +#include "../../../transforms/pattern_utils.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace cmsisnn { +/*! + * \brief Checks if Relay Conv2D was originally CMSIS-NN compliant Depthwise Convolution + * See: + * https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2107 + * + * + * \return true if a Conv2D is a Depthwise Convolution based on Conv2D's inputs' shapes and + * attributes + */ + +bool IsCMSISNNDepthwise(const Conv2DAttrs* conv2d_attrs, const Array& input_shape, + const Array& kernel_shape); + +} // namespace cmsisnn +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_CMSISNN_CONVOLUTIONS_H_ diff --git a/src/relay/backend/contrib/cmsisnn/generate_constants.cc b/src/relay/backend/contrib/cmsisnn/generate_constants.cc index 450bcf26d1b3..297e6b7acea3 100644 --- a/src/relay/backend/contrib/cmsisnn/generate_constants.cc +++ b/src/relay/backend/contrib/cmsisnn/generate_constants.cc @@ -31,6 +31,7 @@ #include "../../../op/make_op.h" #include "../../../qnn/utils.h" #include "../../../transforms/pattern_utils.h" +#include "convolutions.h" namespace tvm { namespace relay { @@ -111,11 +112,7 @@ class GenerateConstantsMutator : public MixedModeMutator { Array input_shape = conv2d_call->args[0]->type_as()->shape; Array kernel_shape = conv2d_call->args[1]->type_as()->shape; - std::string kernel_layout = conv2d_attrs->kernel_layout.c_str(); - int kernel_pos_o = kernel_layout.find("O"); - int groups = conv2d_attrs->groups; - if (groups != qnn::get_const_int(input_shape[3]) || - groups != qnn::get_const_int(kernel_shape[kernel_pos_o])) { + if (!IsCMSISNNDepthwise(conv2d_attrs, input_shape, kernel_shape)) { // Transpose weights: HWIO -> OHWI for Conv2D conv2d_kernel = ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs); } diff --git a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc index 5c99061fa854..d1d1d20d6e34 100644 --- a/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc +++ b/src/relay/backend/contrib/cmsisnn/relay_to_tir.cc @@ -1,4 +1,3 @@ - /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -31,6 +30,7 @@ #include "../../../transforms/pattern_utils.h" #include "buffer_size.h" #include "compiler_attrs.h" +#include "convolutions.h" namespace tvm { namespace relay { @@ -173,7 +173,6 @@ class RelayToTIRVisitor : public MixedModeMutator { int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]); int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]); int32_t out_channels = qnn::get_const_int(conv2d_attrs->channels); - int32_t groups = conv2d_attrs->groups; std::string kernel_layout = conv2d_attrs->kernel_layout.c_str(); int32_t clip_min = std::numeric_limits::min(); int32_t clip_max = std::numeric_limits::max(); @@ -207,11 +206,13 @@ class RelayToTIRVisitor : public MixedModeMutator { int32_t output_c = qnn::get_const_int(output_shape[3]); int32_t depth_multiplier = -1; - int kernel_pos_o = kernel_layout.find("O"); - if (groups == qnn::get_const_int(input_shape[3]) && - groups == qnn::get_const_int(filter_shape[kernel_pos_o])) { + if (IsCMSISNNDepthwise(conv2d_attrs, input_shape, filter_shape)) { + // Refer to TVM frontend to know how depth multiplier and out_channels are related + // https://github.com/apache/tvm/blob/6ed3ab3e33f8eafa4acaf53b7a671831de7587e9/python/tvm/relay/frontend/tflite.py#L2129 int kernel_pos_i = kernel_layout.find("I"); - depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_i]); + int kernel_pos_o = kernel_layout.find("O"); + int kernel_pos_dm = input_c == 1 ? kernel_pos_o : kernel_pos_i; + depth_multiplier = qnn::get_const_int(filter_shape[kernel_pos_dm]); } scalar_args.push_back(ToArg(depth_multiplier)); diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py index 462eb8834719..0b15c5a2466c 100644 --- a/tests/python/contrib/test_cmsisnn/test_conv2d.py +++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py @@ -23,8 +23,13 @@ from tvm import relay from tvm.relay.op.contrib import cmsisnn -from tvm.testing.aot import generate_ref_data, AOTTestModel, compile_models, compile_and_run - +from tvm.testing.aot import ( + generate_ref_data, + AOTTestModel, + compile_models, + compile_and_run, + run_and_check, +) from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER from .utils import ( make_module, @@ -84,13 +89,14 @@ def make_model( ) ) weight_const = relay.const(weight, kernel_dtype) + conv2d_kernel_sc = kernel_scale[0] if out_channels == 1 else kernel_scale conv = relay.qnn.op.conv2d( invar, weight_const, input_zero_point=relay.const(input_zero_point, "int32"), kernel_zero_point=relay.const(kernel_zero_point, "int32"), input_scale=relay.const(input_scale, "float32"), - kernel_scale=relay.const(kernel_scale, "float32"), + kernel_scale=relay.const(conv2d_kernel_sc, "float32"), kernel_size=(kernel_h, kernel_w), data_layout="NHWC", kernel_layout=weight_format, @@ -105,6 +111,7 @@ def make_model( bias_const = relay.const(bias, "int32") last_op = relay.nn.bias_add(conv, bias_const, axis=3) if enable_bias else conv requant_input_sc = [sc * input_scale for sc in kernel_scale] + requant_input_sc = requant_input_sc[0] if out_channels == 1 else requant_input_sc last_op = relay.qnn.op.requantize( last_op, relay.const(requant_input_sc, "float32"), @@ -209,7 +216,7 @@ def test_conv2d_number_primfunc_args( cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"] assert ( len(cmsisnn_func.params) == expected_num_params - ), "Generated unexpected number of function arguments" + ), "Generated unexpected number of function arguments." @tvm.testing.requires_cmsisnn @@ -540,6 +547,135 @@ def test_depthwise_int8( ) +@tvm.testing.requires_cmsisnn +@pytest.mark.parametrize("padding", ["SAME", "VALID"]) +@pytest.mark.parametrize("strides, dilation", [((1, 1), (1, 1))]) +@pytest.mark.parametrize("relu_type", ["RELU", "NONE"]) +@pytest.mark.parametrize("depth_multiplier", [1, 3]) +@pytest.mark.parametrize( + "input_zero_point, input_scale, kernel_scale", + [ + ( + 10, + 0.0128, + [0.11, 0.22], + ), + ( + -64, + 1, + [1, 0.0256, 1.37], + ), + ], +) +def test_relay_conv2d_cmsisnn_depthwise_int8( + padding, + strides, + dilation, + relu_type, + input_zero_point, + input_scale, + kernel_scale, + depth_multiplier, +): + """Tests QNN Depthwise int8 op via CMSIS-NN""" + interface_api = "c" + use_unpacked_api = True + test_runner = AOT_USMP_CORSTONE300_RUNNER + + dtype = "int8" + in_min, in_max = get_range_for_dtype_str(dtype) + + ifm_shape = (1, 24, 24, 1) + groups = ifm_shape[3] + weight_format = "HWIO" + (kernel_h, kernel_w) = (3, 3) + kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) + out_channels = ifm_shape[3] * depth_multiplier + enable_bias = True + ks_len = len(kernel_scale) + kernel_zero_point = 0 + kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] + + output_scale, output_zero_point = get_conv2d_qnn_params( + kernel_shape, + input_scale, + input_zero_point, + kernel_scale, + kernel_zero_point, + dtype, + dtype, + dtype, + True, + ) + + model, params = make_model( + ifm_shape, + kernel_shape, + input_zero_point, + input_scale, + kernel_zero_point, + kernel_scale, + output_zero_point, + output_scale, + padding, + strides, + dilation, + groups, + dtype, + dtype, + out_channels, + weight_format, + enable_bias, + relu_type, + ) + orig_mod = make_module(model) + cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) + + # validate pattern matching + assert_partitioned_function(orig_mod, cmsisnn_mod) + + # generate reference output + rng = np.random.default_rng(12345) + inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)} + output_list = generate_ref_data(orig_mod["main"], inputs, params) + + # validate presence of depthwise convolution + compiled_models = compile_models( + AOTTestModel( + module=cmsisnn_mod, + inputs=inputs, + outputs=output_list, + params=params, + output_tolerance=1, + ), + interface_api, + use_unpacked_api, + pass_config=test_runner.pass_config, + ) + + cmsisnn_tir_mod = None + for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items(): + if target.kind.name == "cmsis-nn": + cmsisnn_tir_mod = mod + + cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"] + call_extern = None + if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate): + call_extern = cmsisnn_func.body.value + else: + call_extern = cmsisnn_func.body.body.value + assert ( + call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8" + ), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution." + + # validate the output + run_and_check( + models=compiled_models, + runner=test_runner, + interface_api=interface_api, + ) + + def parameterize_for_invalid_model(test): """Generates non int8 inputs""" in_dtype = ["uint8", "int8"]