Skip to content
This repository has been archived by the owner on Nov 25, 2022. It is now read-only.

Commit

Permalink
[Adreno] Change compute/schedule for ToMixedPrecision pass (apache#12537
Browse files Browse the repository at this point in the history
)

* [Adreno] Change compute/schedule for ToMixedPrecision pass

* Address CI fails

* address PR comments

* Fix AutoTVM flow
  • Loading branch information
elvin-n authored and xinetzone committed Nov 25, 2022
1 parent 3821c90 commit fb1efc3
Show file tree
Hide file tree
Showing 11 changed files with 218 additions and 395 deletions.
142 changes: 42 additions & 100 deletions python/tvm/relay/op/strategy/adreno.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
raise ValueError("dilation should be positive value")

if groups == 1:
if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
if (
(data_layout == "NCHW" and kernel_layout == "OIHW")
or (data_layout == "NCHW4c" and kernel_layout == "OIHW4o")
or (data_layout == "NCHW" and kernel_layout == "OIHW4o")
):
if len(kernel.shape) == 4:
_, _, kh, kw = get_const_tuple(kernel.shape)
Expand All @@ -47,35 +49,24 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
(2 < kh < 8 and 2 < kw < 8 and kh == kw)
and (stride_h == 1 and stride_w == 1)
and (dilation_h == 1 and dilation_w == 1)
and not (data_layout == "NCHW" and kernel_layout == "OIHW4o")
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchw_winograd),
name="conv2d_nchw_winograd.image2d",
plevel=5,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchw_winograd_acc32),
name="conv2d_nchw_winograd_acc32.image2d",
plevel=7,
)
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchwc),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc),
name="conv2d_nchwc.image2d",
plevel=10,
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchw_winograd),
name="conv2d_nchw_winograd.image2d",
plevel=5,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc_acc32),
name="conv2d_nchwc_acc32.image2d",
plevel=20,
wrap_compute_conv2d(topi.adreno.conv2d_nchwc),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc),
name="conv2d_nchwc.image2d",
plevel=10,
)
elif (data_layout == "NHWC" and kernel_layout == "HWIO") or (
data_layout == "NHWC4c" and kernel_layout == "HWIO4o"
elif (
(data_layout == "NHWC" and kernel_layout == "HWIO")
or (data_layout == "NHWC4c" and kernel_layout == "HWIO4o")
or (data_layout == "NHWC" and kernel_layout == "HWIO4o")
):
if len(kernel.shape) == 4:
kh, kw, _, _ = get_const_tuple(kernel.shape)
Expand All @@ -85,32 +76,19 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
(2 < kh < 8 and 2 < kw < 8 and kh == kw)
and (stride_h == 1 and stride_w == 1)
and (dilation_h == 1 and dilation_w == 1)
and not (data_layout == "NHWC" and kernel_layout == "HWIO4o")
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_winograd),
name="conv2d_nhwc_winograd.image2d",
plevel=5,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_winograd_acc32),
name="conv2d_nhwc_winograd_acc32.image2d",
plevel=7,
)
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc),
name="conv2d_nhwc.image2d",
plevel=10,
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_winograd),
name="conv2d_nhwc_winograd.image2d",
plevel=5,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_acc32),
name="conv2d_nhwc_acc32.image2d",
plevel=20,
wrap_compute_conv2d(topi.adreno.conv2d_nhwc),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc),
name="conv2d_nhwc.image2d",
plevel=10,
)
else:
raise RuntimeError(
Expand Down Expand Up @@ -149,35 +127,21 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target):
if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc),
name="depthwise_conv2d_nchwc.image2d",
plevel=10,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc_acc32),
name="depthwise_conv2d_nchwc_acc32.image2d",
plevel=20,
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc),
name="depthwise_conv2d_nchwc.image2d",
plevel=10,
)
elif (data_layout == "NHWC" and kernel_layout == "HWOI") or (
data_layout == "NHWC4c" and kernel_layout == "HWOI4o"
):
if data.shape[-1] >= 4:
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc),
name="depthwise_conv2d_nhwc.image2d",
plevel=10,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc_acc32),
name="depthwise_conv2d_nhwc_acc32.image2d",
plevel=20,
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc),
name="depthwise_conv2d_nhwc.image2d",
plevel=10,
)
else:
strategy.add_implementation(
Expand Down Expand Up @@ -208,40 +172,18 @@ def conv2d_winograd_without_weight_transfrom_strategy_adreno(attrs, inputs, out_
assert groups == 1, "Do not supoort arbitrary group number"
strategy = _op.OpStrategy()
if layout in ("NCHW", "NCHW4c"):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_without_weight_transform),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nchw_winograd_without_weight_transform
),
name="conv2d_nchw_winograd_without_weight_transform.image2d",
plevel=5,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_without_weight_transform_acc32),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nchw_winograd_without_weight_transform_acc32
),
name="conv2d_nchw_winograd_without_weight_transform_acc32.image2d",
plevel=7,
wrap_compute_conv2d(topi.adreno.conv2d_nchw_winograd_without_weight_transform),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchw_winograd_without_weight_transform),
name="conv2d_nchw_winograd_without_weight_transform.image2d",
plevel=5,
)
elif layout in ("NHWC", "NHWC4c"):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_without_weight_transform),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nhwc_winograd_without_weight_transform
),
name="conv2d_nhwc_winograd_without_weight_transform.image2d",
plevel=5,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_without_weight_transform_acc32),
wrap_topi_schedule(
topi.adreno.schedule_conv2d_nhwc_winograd_without_weight_transform_acc32
),
name="conv2d_nhwc_winograd_without_weight_transform_acc32.image2d",
plevel=7,
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_winograd_without_weight_transform),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_winograd_without_weight_transform),
name="conv2d_nhwc_winograd_without_weight_transform.image2d",
plevel=5,
)
else:
raise RuntimeError(
Expand Down
48 changes: 30 additions & 18 deletions python/tvm/topi/adreno/conv2d_alter_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,24 +304,30 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
num_filter_block = 4

# no support yet for tensors that cannot be divisible by factor 4
if in_channel_block != 4 or num_filter_block != 4:
if num_filter_block != 4:
return None

batch_size, in_channel, height, width = get_const_tuple(data_tensor.shape)
out_channel, in_filter_channel, kh, kw = get_const_tuple(kernel_tensor.shape)

# update new attrs
new_attrs["channels"] = out_channel
new_attrs["data_layout"] = "NCHW%dc" % in_channel_block
if in_channel_block == 4:
new_attrs["data_layout"] = "NCHW%dc" % in_channel_block
else:
new_attrs["data_layout"] = "NCHW"
# (oc, ic, h, w) -> (OC, ic, h, w, oc)
new_attrs["kernel_layout"] = "OIHW%do" % num_filter_block
new_attrs["out_layout"] = "NCHW%dc" % num_filter_block

# Store altered operator's config for applying of tuned AutoTVM statistics
new_data = te.placeholder(
(batch_size, in_channel // in_channel_block, height, width, in_channel_block),
dtype=data_dtype,
)
if in_channel_block == 4:
new_data = te.placeholder(
(batch_size, in_channel // in_channel_block, height, width, in_channel_block),
dtype=data_dtype,
)
else:
new_data = data_tensor
new_kernel = te.placeholder(
(out_channel // num_filter_block, in_filter_channel, kh, kw, num_filter_block),
dtype=kernel_tensor.dtype,
Expand Down Expand Up @@ -361,12 +367,15 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
num_filter_block = 4

# no support yet for tensors cannot be divisible by factor 4
if in_channel_block != 4 or num_filter_block != 4:
if num_filter_block != 4:
return None

# update new attrs
new_attrs["channels"] = out_channles
new_attrs["data_layout"] = "NHWC%dc" % in_channel_block
if in_channel_block == 4:
new_attrs["data_layout"] = "NHWC%dc" % in_channel_block
else:
new_attrs["data_layout"] = "NHWC"
# (h, w, ic, oc) -> (h, w, ic, OC, oc)
if kernel_layout == "HWIO":
new_attrs["kernel_layout"] = "HWIO%do" % num_filter_block
Expand All @@ -375,16 +384,19 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
new_attrs["out_layout"] = "NHWC%dc" % num_filter_block

# Store altered operator's config for applying of tuned AutoTVM statistics
new_data = te.placeholder(
(
batch_size,
in_height,
in_width,
in_channels // in_channel_block,
in_channel_block,
),
dtype=data_dtype,
)
if in_channel_block == 4:
new_data = te.placeholder(
(
batch_size,
in_height,
in_width,
in_channels // in_channel_block,
in_channel_block,
),
dtype=data_dtype,
)
else:
new_data = data_tensor
if kernel_layout == "HWIO":
new_kernel = te.placeholder(
(
Expand Down
Loading

0 comments on commit fb1efc3

Please sign in to comment.