Skip to content

Commit

Permalink
[Relay][AutoTVM] Relay op strategy (apache#4644)
Browse files Browse the repository at this point in the history
* relay op strategy

fix lint

bitpack strategy

bitserial_dense (apache#6)

* update strategy

* address comments

fix a few topi test

Dense strategy (apache#5)

* dense

* add biforst; remove comments

* address comment

Refactor x86 conv2d_NCHWc (#4)

* Refactor x86 conv2d

* Add x86 depthwise_conv2d_NCHWc

* Add back topi x86 conv2d_nchw

* Merge x86 conv2d_nchw and conv2d_NCHWc

* Minor fix for x86 conv2d

fix more strategy

Add x86 conv2d_NCHWc_int8 strategy (apache#8)

* Add x86 conv2d_NCHWc_int8 strategy

* Remove contrib_conv2d_nchwc_int8

* Fix generic conv2d_NCHWc for int8

* Fix topi arm_cpu conv2d_NCHWc_int8

update x86 conv2d

enable specify relay ops to be tuned for autotvm

add cuda conv2d strategy

add conv2d strategy for rocm

add conv2d strategy for hls

add conv2d strategy for arm cpu

add conv2d strategy for mali

add conv2d strategy for bifrost

add conv2d strategy for intel graphics

clean up and fix lint

remove template keys from autotvm

remove 2 in the func name

address comments

fix

* fix bugs

* lint

* address comments

* add name to op implement

* Modify topi tests (apache#9)

* Add pooling, reorg, softmax and vision

* Add lrn

* fix topi test

* fix more topi test

* lint

* address comments

* x

* fix more tests & bugs

* Modify more tests (apache#10)

* Modify tests for bitserial_conv2d, bitserial_dense, bitserial_conv2d_rasp and bnn

* Minor fix

* More minor fix

* fix more test

* try to update vta using strategy

* fix cpptest

* x

* fix rebase err

* Fix two tests (apache#11)

* change autotvm log format

* lint

* minor fix

* try fix vta test

* fix rebase err

* tweak

* tmp hack for vta pass

* fix tutorial

* fix

* fix more tutorials

* fix vta tutorial

* minor

* address comments

* fix

* address comments

* fix cpptest

* fix docs

* change data structure name and api

* address comments

* lint

* fix rebase err

* updates

* fix winograd test

* fix doc

* rebase

* upgrade tophub version number

* fix bug

* re-enable vta tsim test after tophub is upgraded

* fix vta test to use the correct args so the config can be found in tophub

Co-authored-by: Yao Wang <kevinthesunwy@gmail.com>
  • Loading branch information
icemelon and kevinthesun authored Feb 24, 2020
1 parent 2753b8b commit 7fa68ba
Show file tree
Hide file tree
Showing 14 changed files with 172 additions and 203 deletions.
8 changes: 6 additions & 2 deletions python/vta/ir_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,8 +662,12 @@ def _do_fold(op):
0, 0,
0, 0, 0))
inner = irb.get()
args = op.body.body.args
res_tensor = op.body.body.func.output(0)
# TODO(@tmoreau89): This is only a temporary fix, please take a look.
body = op.body.body
while isinstance(body, tvm.stmt.IfThenElse):
body = body.then_case
args = body.args
res_tensor = body.func.output(0)
tpl = (args[0], 1, args[1], 1, args[2], 1, args[3], 1, 0, 1, 0, env.BLOCK_OUT)
inner = tvm.tir.AttrStmt(
[dout, res_tensor], 'buffer_bind_scope',
Expand Down
8 changes: 4 additions & 4 deletions python/vta/top/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from . import bitpack
from .graphpack import graph_pack
from . import op
from . import vta_conv2d
from . import vta_conv2d_transpose
from . import vta_group_conv2d
from . import vta_dense
from .vta_conv2d import conv2d_packed, schedule_conv2d_packed
from .vta_conv2d_transpose import conv2d_transpose_packed, schedule_conv2d_transpose_packed
from .vta_group_conv2d import group_conv2d_packed, schedule_group_conv2d_packed
from .vta_dense import dense_packed, schedule_dense_packed
from . import util
5 changes: 2 additions & 3 deletions python/vta/top/bitpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@
import tvm
from topi import util

from tvm.relay.op.op import register_compute, register_schedule
from tvm.relay.op.op import register_compute, register_injective_schedule
from tvm.relay.op.op import register_pattern, OpPattern
from tvm.relay.op.op import schedule_injective

def bitpack(data, bits, pack_type="int8", name="bitpack"):
"""Packs lowest dimension into format needed by VTA
Expand Down Expand Up @@ -86,5 +85,5 @@ def compute_bitpack(attrs, inputs):
bits = 8 // lanes
return bitpack(inputs[0], bits, dtype)

register_schedule("bitpack", schedule_injective)
register_injective_schedule("bitpack")
register_pattern("bitpack", OpPattern.INJECTIVE)
217 changes: 80 additions & 137 deletions python/vta/top/op.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,22 @@
import topi

from tvm.relay.op import op as reg
from tvm.relay.op.op import OpPattern
from tvm.relay.op.nn import _nn
from tvm.relay.op import strategy as _strategy
from tvm.relay.op.op import OpPattern, OpStrategy

from .util import is_packed_layout
from .vta_conv2d import conv2d_packed, schedule_conv2d_packed
from .vta_conv2d_transpose import conv2d_transpose_packed, schedule_conv2d_transpose_packed
from .vta_group_conv2d import group_conv2d_packed, schedule_group_conv2d_packed
from .vta_dense import dense_packed, schedule_dense_packed
from ..environment import get_env


# override to force partition at copy
reg.register_pattern("copy", OpPattern.INJECTIVE, level=15)


@reg.register_compute("clip", level=15)
def compute_clip(attrs, inputs, output_type, target):
# add clip vta strategy
def compute_clip_vta(attrs, inputs, output_type):
""" Clip operator. """
x = inputs[0]
a_min = attrs.a_min
Expand All @@ -48,139 +51,79 @@ def compute_clip(attrs, inputs, output_type, target):
x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
return [x]


@reg.register_compute("nn.conv2d", level=15)
def compute_conv2d(attrs, inputs, output_type, target):
""" Compute definition of conv2d """
padding = topi.util.get_const_tuple(attrs.padding)
strides = topi.util.get_const_tuple(attrs.strides)
dilation = tuple([int(d) for d in attrs.dilation])
def clip_strategy_vta(attrs, inputs, out_type, target):
strategy = OpStrategy()
strategy.add_implementation(
compute_clip_vta,
_strategy.wrap_topi_schedule(topi.generic.schedule_injective),
name="clip.vta")
return strategy

reg.get("clip").get_attr("FTVMStrategy").register(clip_strategy_vta, "vta")

@_strategy.conv2d_strategy.register("vta")
def conv2d_strategy_vta(attrs, inputs, out_type, target):
"""conv2d vta strategy"""
strategy = OpStrategy()
kernel = inputs[1]
dilation = topi.util.get_const_tuple(attrs.dilation)
groups = attrs.groups
layout = attrs.data_layout
out_dtype = attrs.out_dtype

if target.device_name == "vta":
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
if groups == 1:
assert groups == 1
env = get_env()
assert env.LOG_INP_WIDTH == 3, "only support 8bit inp for now"
assert env.LOG_WGT_WIDTH == 3, "only support 8bit wgt for now"
inputs = list(inputs)
assert inputs[1].dtype == "int8"
return [topi.nn.conv2d(inputs[0],
inputs[1],
strides,
padding,
dilation,
layout,
out_dtype)]
return [topi.nn.group_conv2d_nchw(inputs[0],
inputs[1],
strides,
padding,
dilation,
groups,
out_dtype)]
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.compute_conv2d(attrs, inputs, output_type, target)

# If VTA is not the target, default to _nn def
return _nn.compute_conv2d(attrs, inputs, output_type, target)


@reg.register_schedule("nn.conv2d", level=15)
def schedule_conv2d(attrs, outs, target):
""" Schedule definition of conv2d """
groups = attrs.groups
layout = attrs.data_layout

if target.device_name == "vta":
if is_packed_layout(layout):
target = tvm.target.create(target)
assert target.device_name == "vta"
if groups == 1:
return topi.generic.schedule_conv2d_nchw(outs)
return topi.generic.schedule_group_conv2d_nchw(outs)
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.schedule_conv2d(attrs, outs, tvm.target.Target.current())

# If VTA is not the target, default to _nn def
return _nn.schedule_conv2d(attrs, outs, target)


@reg.register_compute("nn.conv2d_transpose", level=15)
def compute_conv2d_transpose(attrs, inputs, output_type, target):
""" 2D convolution algorithm.
"""
padding = topi.util.get_const_tuple(attrs.padding)
strides = topi.util.get_const_tuple(attrs.strides)
dilation = tuple([int(d) for d in attrs.dilation])
layout = attrs.data_layout
out_dtype = attrs.out_dtype

if target.device_name == "vta":
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
return [topi.nn.conv2d_transpose_nchw(
inputs[0], inputs[1], strides, padding, out_dtype)]
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)

# If VTA is not the target, default to _nn def
return _nn.compute_conv2d_transpose(attrs, inputs, output_type, target)


@reg.register_schedule("nn.conv2d_transpose", level=15)
def schedule_conv2d_transpose(attrs, outputs, target):
""" 2D convolution schedule.
"""
assert dilation == (1, 1), "support for dilation limited to (1, 1)"
if is_packed_layout(layout):
if groups == 1:
env = get_env()
assert env.LOG_INP_WIDTH == 3, "only support 8bit inp for now"
assert env.LOG_WGT_WIDTH == 3, "only support 8bit wgt for now"
assert kernel.dtype == "int8"

strategy.add_implementation(
_strategy.wrap_compute_conv2d(conv2d_packed, True),
_strategy.wrap_topi_schedule(schedule_conv2d_packed),
name="conv2d_packed.vta")
else: # group_conv2d
strategy.add_implementation(
_strategy.wrap_compute_conv2d(group_conv2d_packed, has_groups=True),
_strategy.wrap_topi_schedule(schedule_group_conv2d_packed),
name="group_conv2d_packed.vta")
return strategy

# If it's not packed, run on ARM CPU
arm_tgt = tvm.target.arm_cpu(target.model)
return _strategy.arm_cpu.conv2d_strategy_arm_cpu(attrs, inputs, out_type, arm_tgt)


@_strategy.conv2d_transpose_strategy.register("vta")
def conv2d_transpose_strategy_vta(attrs, inputs, out_type, target):
"""conv2d_transpose vta strategy"""
dilation = topi.util.get_const_tuple(attrs.dilation)
layout = attrs.data_layout

if target.device_name == "vta":
if is_packed_layout(layout):
return topi.nn.schedule_conv2d_transpose_nchw(outputs)
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.Target.current())

# If VTA is not the target, default to _nn def
return _nn.schedule_conv2d_transpose(attrs, outputs, tvm.target.Target.current())


@reg.register_compute("nn.dense", level=15)
def compute_dense(attrs, inputs, out_type, target):
"""Compute definition of dense"""
out_dtype = attrs.out_dtype
out_dtype = inputs[0].dtype if out_dtype == "" else out_dtype

if target.device_name == "vta":
if inputs[0].shape == 4: # this implies the layout is packed
target = tvm.target.create(target)
return [topi.nn.dense(inputs[0], inputs[1], None, out_dtype)]
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.compute_dense(attrs, inputs, out_type, target)

# If VTA is not the target, default to _nn def
return _nn.compute_dense(attrs, inputs, out_type, target)


@reg.register_schedule("nn.dense", level=15)
def schedule_dense(attrs, outs, target):
"""Schedule definition of dense"""
if target.device_name == "vta":
if outs[0].shape == 4: # this implies the layout is packed
target = tvm.target.create(target)
assert target.device_name == "vta"
return topi.generic.schedule_dense(outs)
# If it's not packed, run on ARM CPU
with tvm.target.arm_cpu(tvm.target.Target.current().model):
return _nn.schedule_dense(attrs, outs, tvm.target.Target.current())

# If VTA is not the target, default to _nn def
return _nn.schedule_dense(attrs, outs, target)
assert dilation == (1, 1), "support for dilation limited to (1, 1)"

if is_packed_layout(layout):
strategy = OpStrategy()
strategy.add_implementation(
_strategy.wrap_compute_conv2d_transpose(conv2d_transpose_packed),
_strategy.wrap_topi_schedule(schedule_conv2d_transpose_packed),
name="conv2d_transpose_packed.vta")
return strategy

# If it's not packed, run on ARM CPU
arm_tgt = tvm.target.arm_cpu(target.model)
return _strategy.arm_cpu.conv2d_transpose_strategy_arm_cpu(attrs, inputs, out_type, arm_tgt)


@_strategy.dense_strategy.register("vta")
def dense_strategy_vta(attrs, inputs, out_type, target):
"""dense vta strategy"""
if inputs[0].shape == 4: # this implies the layout is packed
strategy = OpStrategy()
strategy.add_implementation(
_strategy.wrap_compute_dense(dense_packed),
_strategy.wrap_topi_schedule(schedule_dense_packed),
name="dense_packed.vta")
return strategy
# If it's not packed, run on ARM CPU
arm_tgt = tvm.target.arm_cpu(target.model)
return _strategy.x86.dense_strategy_cpu(attrs, inputs, out_type, arm_tgt)
16 changes: 5 additions & 11 deletions python/vta/top/vta_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,8 @@
from .util import is_packed_layout
from ..environment import get_env

@autotvm.register_topi_compute(topi.nn.conv2d, 'vta', 'direct')
def _declaration_conv2d(cfg,
data,
kernel,
strides,
padding,
dilation,
layout,
out_dtype):
@autotvm.register_topi_compute("conv2d_packed.vta")
def conv2d_packed(cfg, data, kernel, strides, padding, dilation, layout, out_dtype):
""" Packed conv2d function."""
if not is_packed_layout(layout):
raise topi.InvalidShapeError()
Expand Down Expand Up @@ -69,8 +62,9 @@ def _declaration_conv2d(cfg,

return res

@autotvm.register_topi_schedule(topi.generic.schedule_conv2d_nchw, 'vta', 'direct')
def _schedule_conv2d(cfg, outs):
@autotvm.register_topi_schedule("conv2d_packed.vta")
def schedule_conv2d_packed(cfg, outs):
"""Schedule packed conv2d"""
assert len(outs) == 1
output = outs[0]
const_ops = []
Expand Down
15 changes: 6 additions & 9 deletions python/vta/top/vta_conv2d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,9 @@

from ..environment import get_env

@autotvm.register_topi_compute(topi.nn.conv2d_transpose_nchw, 'vta', 'direct')
def _declatation_conv2d_transpose(cfg,
data,
kernel,
strides,
padding,
out_dtype):
@autotvm.register_topi_compute("conv2d_transpose_packed.vta")
def conv2d_transpose_packed(cfg, data, kernel, strides, padding, out_dtype):
"""Packed conv2d_transpose compute"""
ishape = get_const_tuple(data.shape)
kshape = get_const_tuple(kernel.shape)
b, c_i, i_h, i_w, t_b, t_ci = ishape
Expand Down Expand Up @@ -75,8 +71,9 @@ def _declatation_conv2d_transpose(cfg,

return out

@autotvm.register_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw, 'vta', 'direct')
def _schedule_conv2d_transpose(cfg, outs):
@autotvm.register_topi_schedule("conv2d_transpose_packed.vta")
def schedule_conv2d_transpose_packed(cfg, outs):
"""Schedule packed conv2d_transpose"""
assert len(outs) == 1
output = outs[0]
ewise_inputs = []
Expand Down
12 changes: 4 additions & 8 deletions python/vta/top/vta_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,8 @@ def is_packed_layout(layout):
return True
return False

@autotvm.register_topi_compute(topi.nn.dense, 'vta', 'direct')
def _declaration_dense(cfg,
data,
weight,
bias=None,
out_dtype=None):
@autotvm.register_topi_compute("dense_packed.vta")
def dense_packed(cfg, data, weight, bias=None, out_dtype=None):
"""Dense function declaration."""

# Make sure that the dense operator is packed
Expand Down Expand Up @@ -67,8 +63,8 @@ def _declaration_dense(cfg,

return res

@autotvm.register_topi_schedule(topi.generic.schedule_dense, 'vta', 'direct')
def _schedule_dense(cfg, outs):
@autotvm.register_topi_schedule("dense_packed.vta")
def schedule_dense_packed(cfg, outs):
"""Packed dense schedule."""

assert len(outs) == 1
Expand Down
8 changes: 4 additions & 4 deletions python/vta/top/vta_group_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

from ..environment import get_env

@autotvm.register_topi_compute(topi.nn.group_conv2d_nchw, 'vta', 'direct')
def packed_group_conv2d(cfg,
@autotvm.register_topi_compute("group_conv2d_packed.vta")
def group_conv2d_packed(cfg,
data,
kernel,
strides,
Expand Down Expand Up @@ -74,8 +74,8 @@ def packed_group_conv2d(cfg,
return out


@autotvm.register_topi_schedule(topi.generic.schedule_group_conv2d_nchw, 'vta', 'direct')
def schedule_packed_group_conv2d(cfg, outs):
@autotvm.register_topi_schedule("group_conv2d_packed.vta")
def schedule_group_conv2d_packed(cfg, outs):
""" Schedule the packed conv2d.
"""
assert len(outs) == 1
Expand Down
Loading

0 comments on commit 7fa68ba

Please sign in to comment.