Skip to content

Commit

Permalink
[microNPU] Add support for transpose convolution (apache#9855)
Browse files Browse the repository at this point in the history
Adds support for legalizing transpose convolution to
a microNPU conv2d operation for the case when strides==(2, 2),
dilation==(1, 1) and no padding of the output is required.

Change-Id: I485e2571913b3dcd7c75c46304f2f9a82f630ee0
  • Loading branch information
lhutton1 authored and ylc committed Feb 16, 2022
1 parent 5b379e7 commit 2721698
Show file tree
Hide file tree
Showing 11 changed files with 730 additions and 106 deletions.
82 changes: 82 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,87 @@ def __call__(self, *args, **kwargs):
pass


class Conv2DTransposeRewriter(DFPatternCallback):
"""Convert conv2d_transpose related composite functions into
ethosu_conv2d_transpose operators."""

def __init__(self):
super().__init__(require_type=True)
self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))(
wildcard()
)

def callback(
self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
) -> tvm.relay.Expr:
params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body)
params.ifm.tensor = post.args[0]

ofm_shape = params.ofm.shape
legalize_padding = params.legalize_padding

weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]}
weights_values = params.weights.values
weights_values_ohwi = np.transpose(
weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]
)
weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2))
weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype)

bias_values = (
params.biases.tensor.data.asnumpy()
if params.biases
else np.zeros((params.ifm.shape[-1]))
)
scale_bias = vela_api.pack_biases(
biases=bias_values,
ifm_scale=params.ifm.q_params.scale_f32,
ifm_dtype=np.dtype(params.ifm.dtype),
weight_scales=params.weights.q_params.scale_f32,
ofm_scale=params.ofm.q_params.scale_f32,
is_activation_tanh_or_sigmoid=False,
)

reduced_op = ethosu_ops.ethosu_conv2d(
ifm=post.args[0],
weight=weights,
scale_bias=relay.const(scale_bias, "uint8"),
lut=relay.const([], dtype="int8"),
ifm_scale=float(params.ifm.q_params.scale_f32),
ifm_zero_point=int(params.ifm.q_params.zero_point),
weight_zero_point=int(params.weights.q_params.zero_point),
ofm_scale=float(params.ofm.q_params.scale_f32),
ofm_zero_point=int(params.ofm.q_params.zero_point),
kernel_shape=params.kernel_shape,
ofm_channels=int(ofm_shape[-1]),
strides=(1, 1),
padding=legalize_padding,
dilation=params.dilation,
ifm_layout=str(params.ifm.layout),
ofm_layout=str(params.ofm.layout),
upscale="ZEROS",
)

# Remove additional padding by 'cropping' back to expected size
return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)


@ir.transform.module_pass(opt_level=1)
class LegalizeConv2DTranspose:
"""This is the pass that wraps the Conv2DTransposeRewriter"""

def transform_module(
self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
) -> tvm.ir.IRModule:
for global_var, func in mod.functions.items():
func = rewrite(Conv2DTransposeRewriter(), func)
mod.update_func(global_var, func)
return mod

def __call__(self, *args, **kwargs):
pass


class DepthwiseConv2DRewriter(DFPatternCallback):
"""Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d
operators"""
Expand Down Expand Up @@ -1379,6 +1460,7 @@ def transform_module(
"""
mod = LegalizeSplit()(mod)
mod = LegalizeConv2D()(mod)
mod = LegalizeConv2DTranspose()(mod)
mod = LegalizeDepthwiseConv2D()(mod)
mod = LegalizeMaxPooling()(mod)
mod = LegalizeAvgPooling()(mod)
Expand Down
9 changes: 8 additions & 1 deletion python/tvm/relay/backend/contrib/ethosu/te/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,17 @@ def conv2d_compute(
stride_h, stride_w = [int(v) for v in strides]
dilation_h, dilation_w = [int(v) for v in dilation]
ofm_channels, kernel_h, kernel_w, ifm_channels = [int(v) for v in weight.shape]
upscale_factor = 2 if upscale != "NONE" else 1

# Compute operation for the IFM DMA pipeline
dmaed_ifm = dma_ifm_compute(
ifm, ifm_layout, ifm_zero_point, ifm_scale, weight.shape[3], padding
ifm,
ifm_layout,
ifm_zero_point,
ifm_scale,
weight.shape[3],
padding,
upscale_factor,
)

# 2D Convolution compute operation
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_conv2d_params(stmt, producers, consumers):
padding=serial_padding,
activation=serial_activation,
rounding_mode=attrs["rounding_mode"],
upscale="NONE",
upscale=attrs["upscale"],
),
output_pointer,
replace_pointer,
Expand Down
14 changes: 14 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ class QConv2DArgs(Enum):
WEIGHTS_SCALE = 5


class QConv2DTransposeArgs(Enum):
"""
This is a helper enum to obtain the correct index
of qnn.conv2d_transpose arguments.
"""

IFM = 0
WEIGHTS = 1
IFM_ZERO_POINT = 2
WEIGHTS_ZERO_POINT = 3
IFM_SCALE = 4
WEIGHTS_SCALE = 5


class RequantArgs(Enum):
"""
This is a helper enum to obtain the correct index
Expand Down
152 changes: 152 additions & 0 deletions python/tvm/relay/op/contrib/ethosu.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,137 @@ def is_valid(self) -> bool:
return not self.is_depthwise


class QnnConv2DTransposeParams:
"""
This class will parse a Call to a ethosu.qnn_conv2d_transpose composite
function and extract quantization information of all the associated tensors.
"""

composite_name = "ethos-u.qnn_conv2d_transpose"
# The NPU only supports padding upto the numbers as follows
padding_bounds = [31, 31, 32, 32]

@requires_vela
def __init__(self, func_body: tvm.relay.Function):
from tvm.relay.backend.contrib.ethosu.util import QConv2DTransposeArgs # type: ignore
from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs
from tvm.relay.backend.contrib.ethosu.util import RequantArgs

requantize = func_body
call = func_body.args[0]
if str(call.op) == "nn.bias_add":
bias_add = call
call = call.args[0]
else:
bias_add = None
qnn_conv2d_transpose = call

data_layout = qnn_conv2d_transpose.attrs.data_layout
self.kernel_layout = qnn_conv2d_transpose.attrs.kernel_layout

self.weights = TensorParams(
qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS.value],
self.kernel_layout,
qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_SCALE.value],
qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_ZERO_POINT.value],
)
self.biases = (
TensorParams(
bias_add.args[BiasAddArgs.BIASES.value],
data_layout,
requantize.args[RequantArgs.IFM_SCALE.value],
requantize.args[RequantArgs.IFM_ZERO_POINT.value],
)
if bias_add
else None
)
self.ifm = TensorParams(
qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM.value],
data_layout,
qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_SCALE.value],
qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_ZERO_POINT.value],
)
self.ofm = TensorParams(
func_body,
data_layout,
requantize.args[RequantArgs.OFM_SCALE.value],
requantize.args[RequantArgs.OFM_ZERO_POINT.value],
)

attrs = qnn_conv2d_transpose.attrs
self.strides = attrs.strides
self.dilation = attrs.dilation
self.padding = attrs.padding
self.channels = attrs.channels
self.groups = attrs.groups
self.output_padding = attrs.output_padding

kernel_size_map = {
"IOHW": self.weights.shape[2:4],
}
self.kernel_shape = kernel_size_map[str(self.weights.layout)]

# Different padding is used in the legalization from conv2d_transpose
# to conv2d, so we to calculate it here to check that the new size fits
# within the bounds of the NPU before offloading.
pad_top = int(self.kernel_shape[0]) - 1 - int(self.padding[0])
pad_left = int(self.kernel_shape[1]) - 1 - int(self.padding[1])
pad_bottom = int(self.kernel_shape[0]) - 1 - int(self.padding[2])
pad_right = int(self.kernel_shape[1]) - 1 - int(self.padding[3])
if self.strides == [2, 2]:
pad_bottom -= 1
pad_right -= 1
self.legalize_padding = [pad_top, pad_left, pad_bottom, pad_right]

def is_valid(self) -> bool:
"""
This function checks whether QnnConv2D has compatible attributes with the NPU
"""

def check_compatible_output_size(ifm_shape, ofm_shape, padding, strides, kernel_shape):
is_valid_padding = padding == [0, 0, 0, 0]
if is_valid_padding:
expected_height = ifm_shape[1] * strides[0] + (kernel_shape[0] - strides[0])
expected_width = ifm_shape[2] * strides[1] + (kernel_shape[1] - strides[1])
else:
expected_height = ifm_shape[1] * strides[0]
expected_width = ifm_shape[2] * strides[1]
return ofm_shape[1] == expected_height and ofm_shape[2] == expected_width

tensor_params = [self.weights, self.ifm, self.ofm]
if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
return False
if not check_weights(self.weights, self.dilation):
return False
if self.biases and not check_bias(self.biases):
return False
if not check_strides(self.strides, stride_range=(2, 2)):
return False
if not check_batch_size(self.ifm):
return False
if not check_dilation(self.dilation, dilation_range=(1, 1)):
return False
if not check_compatible_output_size(
self.ifm.shape,
self.ofm.shape,
[int(x) for x in self.padding],
self.strides,
self.kernel_shape,
):
return False
if not check_padding(self.legalize_padding, self.padding_bounds):
return False
if self.kernel_shape[0] - 2 - int(self.padding[2]) < 0:
return False
if self.kernel_shape[1] - 2 - int(self.padding[3]) < 0:
return False
if self.groups != 1:
return False
if list(self.output_padding) != [0, 0]:
return False
return True


class QnnDepthwiseConv2DParams(QnnConv2DParams):
"""
This class will parse a call to a ethosu.depthwise_conv2d composite function
Expand Down Expand Up @@ -348,6 +479,22 @@ def qnn_depthwise_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
return clip_or_req


def qnn_conv2d_transpose_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
"""
This function creates the pattern for qnn.conv2d_transpose.
"""
qnn_conv2d_transpose = is_op("qnn.conv2d_transpose")(
wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
).has_attr({"kernel_layout": "IOHW"})
optional_bias_add = (
is_op("nn.bias_add")(qnn_conv2d_transpose, is_constant()) | qnn_conv2d_transpose
)
req = is_op("qnn.requantize")(
optional_bias_add, is_constant(), is_constant(), is_constant(), is_constant()
)
return req


class MaxPool2DParams:
"""
This class will parse a call to a ethos-u.maxpool2d composite function
Expand Down Expand Up @@ -1299,6 +1446,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
qnn_depthwise_conv2d_pattern(),
lambda pat: QnnDepthwiseConv2DParams(pat).is_valid(),
),
(
QnnConv2DTransposeParams.composite_name,
qnn_conv2d_transpose_pattern(),
lambda pat: QnnConv2DTransposeParams(pat).is_valid(),
),
(
MaxPool2DParams.composite_name,
qnn_maxpool2d_pattern(),
Expand Down
17 changes: 16 additions & 1 deletion src/relay/op/contrib/ethosu/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attr
return false;
}

const std::unordered_set<std::string> upscale_methods = {"NONE", "ZEROS", "NEAREST"};
if (upscale_methods.find(param->upscale) == upscale_methods.end()) {
reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
<< "Invalid operator: Expected upsample method to be 'NONE', "
"'ZEROS' or 'NEAREST' but got "
<< param->upscale);
return false;
}

// The scale_bias should be provided as a tensor of size {ofm_channels, 10}
reporter->Assign(types[2], TensorType({weight->shape[0], 10}, DataType::UInt(8)));

Expand All @@ -162,10 +171,16 @@ bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attr
param->kernel_shape[1], weight->shape[3]},
weight->dtype));

Array<IndexExpr> ifm_shape = ifm->shape;
if (param->upscale != "NONE") {
ifm_shape = EthosuInferUpscaledInput(ifm_shape, param->ifm_layout);
}

// Assign ofm type
auto ofm_shape =
EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
EthosuInferKernelOutput(ifm_shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
param->ofm_channels, param->dilation, param->strides, param->padding);

reporter->Assign(types[4], TensorType(ofm_shape, ifm->dtype));
return true;
}
Expand Down
3 changes: 2 additions & 1 deletion tests/python/contrib/test_ethosu/infra.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ def make_ethosu_conv2d(
weight_dtype="int8",
scale_bias_dtype="uint8",
rounding_mode="TFL",
upscale="NONE",
):
# conv params
weight_shape = (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels)
Expand Down Expand Up @@ -451,7 +452,7 @@ def make_ethosu_conv2d(
clip_min=10 if activation == "CLIP" else 0,
clip_max=100 if activation == "CLIP" else 0,
rounding_mode=rounding_mode,
upscale="NONE",
upscale=upscale,
ifm_layout=ifm_layout,
ofm_layout=ofm_layout,
)
Expand Down
Loading

0 comments on commit 2721698

Please sign in to comment.