Skip to content

Commit

Permalink
fixing annotations for copies and improving test coverage
Browse files Browse the repository at this point in the history
Change-Id: Ib812c4151fab03f4c1adcc016b4e798003a22e5e
  • Loading branch information
lhutton1 committed May 17, 2022
1 parent dae79a8 commit 954c56e
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 43 deletions.
24 changes: 22 additions & 2 deletions python/tvm/contrib/ethosu/cascader/plan_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
# specific language governing permissions and limitations
# under the License.
"""Algorithms to generate Plans for a CascaderGraph."""
from typing import List, Dict
from typing import List, Dict, Tuple

from tvm.contrib.ethosu.cascader.tensor_config import MemoryRegion
from tvm.contrib.ethosu.cascader.tensor_config import MemoryRegion, TensorConfig

from . import _ffi_api
from .cascader_options import CascaderOptions
Expand Down Expand Up @@ -55,3 +55,23 @@ def _generate_graph_plans(
home_map,
options,
)


def get_copy_cycles_hint(tensor_config: TensorConfig) -> Tuple[int, int]:
"""
Returns a hint estimating the number of cycles for the copy
specified by tensor_config.
Parameters
----------
tensor_config : TensorConfig
The tensor configuration to estimate.
Returns
-------
mem2mem_cycles : int
Total estimated cycles.
initial_mem2mem_cycles : int
Estimated cycles for the first block.
"""
return _ffi_api.GetCopyCyclesHint(tensor_config)
5 changes: 2 additions & 3 deletions python/tvm/contrib/ethosu/cascader/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .tensor_config import MemoryRegion
from .proposal import Proposal
from .proposal_generator import generate_proposals
from .plan_generator import get_copy_cycles_hint
from .graph import create_cascader_graph
from .device_config import EthosuDeviceConfig
from .logging import Logging
Expand Down Expand Up @@ -176,9 +177,7 @@ def apply_proposal(proposal: Proposal, sch: te.Schedule) -> None:
if tensor_config.home_region != tensor_config.copy_region:
copy_te_tensors.append(part.subgraph.input_tensors[i])

compute_cycles_hint = part.get_performance_info(
tensor_config.stripe_configs[0], tensor_config.buffer_mode
).compute_cycles
compute_cycles_hint, _ = get_copy_cycles_hint(tensor_config)
compute_cycles_hints.append(compute_cycles_hint)

for te_tensor, compute_cycles_hint in zip(copy_te_tensors, compute_cycles_hints):
Expand Down
7 changes: 4 additions & 3 deletions python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,10 @@ def _detect_cache_read(stage):
fax = stage.fuse(*stage.op.axis)

# propagate pragmas placed on the outer loop
attrs = stage.iter_var_attrs[stage.op.axis[0]]
for k, v in zip(attrs.pragma_keys, attrs.pragma_values):
stage.pragma(fax, k.value, v)
if len(stage.op.axis) > 0 and stage.op.axis[0] in stage.iter_var_attrs:
attrs = stage.iter_var_attrs[stage.op.axis[0]]
for k, v in zip(attrs.pragma_keys, attrs.pragma_values):
stage.pragma(fax, k.value, v)

stage.pragma(fax, "op", "ethosu_copy")

Expand Down
64 changes: 45 additions & 19 deletions src/contrib/ethosu/cascader/plan_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,42 @@ int GetInteriorMemoryUsage(const std::vector<TensorConfig>& input_configs,
return memory_usage;
}

/**
* \brief Returns a hint estimating the number of cycles required for
* the copy specified by tensor_config.
*
* \param tensor_config The tensor configuration to estimate.
* \return mem2mem_cycles Total estimated cycles.
* \return initial_mem2mem_cycles Estimated cycles for the first block.
*/
std::pair<int, int> GetCopyCyclesHint(const TensorConfig& tensor_config) {
Tensor tensor = tensor_config->GetTensor();
MemoryRegion home_region = tensor_config->GetHomeRegion();
MemoryRegion copy_region = tensor_config->GetCopyRegion();
int initial_mem2mem_cycles = 0;
int mem2mem_cycles = 0;

// This Tensor needs to be copied - Count stripes for this config
for (const auto& stripe_config : tensor_config->GetStripeConfigs()) {
std::map<std::vector<int>, int> input_blocks = CountStripes(stripe_config, true);
bool first_block = true;
for (const auto& block : input_blocks) {
int bytes_transferred = mul_reduce(block.first) * tensor->GetDataType().bytes() *
tensor->GetCompressionRatio() * block.second;
int read_cycles = bytes_transferred * home_region->read_bandwidth + home_region->read_latency;
int write_cycles = bytes_transferred * copy_region->write_bandwidth;

if (first_block) {
first_block = false;
initial_mem2mem_cycles += std::max(read_cycles, write_cycles);
}
mem2mem_cycles += std::max(read_cycles, write_cycles);
}
}

return {mem2mem_cycles, initial_mem2mem_cycles};
}

std::vector<Plan> GenerateSinglePlans(
const Part& part, const std::vector<StripeConfig>& output_stripe_configs,
const std::unordered_map<Tensor, std::vector<MemoryRegion>, ObjectPtrHash, ObjectPtrEqual>&
Expand Down Expand Up @@ -372,28 +408,12 @@ std::vector<Plan> GenerateSinglePlans(
BlockConfig block_config = perf_info->block_config;
for (size_t i = 0; i < input_configs.size(); i++) {
Tensor tensor = input_configs[i]->GetTensor();
MemoryRegion home_region = input_configs[i]->GetHomeRegion();
MemoryRegion copy_region = input_configs[i]->GetCopyRegion();

if (input_configs[i]->DoCopy()) {
// This Tensor needs to be copied - Count stripes for this config
for (const auto& stripe_config : input_configs[i]->GetStripeConfigs()) {
std::map<std::vector<int>, int> input_blocks = CountStripes(stripe_config, true);
bool first_block = true;
for (const auto& block : input_blocks) {
int bytes_transferred = mul_reduce(block.first) * tensor->GetDataType().bytes() *
tensor->GetCompressionRatio() * block.second;
int read_cycles = bytes_transferred * home_region->read_bandwidth +
input_configs[i]->GetHomeRegion()->read_latency;
int write_cycles = bytes_transferred * copy_region->write_bandwidth;

if (first_block) {
first_block = false;
initial_mem2mem_cycles += std::max(read_cycles, write_cycles);
}
mem2mem_cycles += std::max(read_cycles, write_cycles);
}
}
std::pair<int, int> ret = GetCopyCyclesHint(input_configs[i]);
mem2mem_cycles += ret.first;
initial_mem2mem_cycles += ret.second;
}
float read_efficiency =
GetTransferEfficiency(tensor, block_config->GetInputBlockShape(), copy_region);
Expand Down Expand Up @@ -585,6 +605,12 @@ TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GenerateGraphPlans")
return tclosed_plans;
});

TVM_REGISTER_GLOBAL("contrib.ethosu.cascader.GetCopyCyclesHint")
.set_body_typed([](TensorConfig tensor_config) {
std::pair<int, int> ret = GetCopyCyclesHint(tensor_config);
return Array<Integer>({ret.first, ret.second});
});

} // namespace cascader
} // namespace ethosu
} // namespace contrib
Expand Down
89 changes: 74 additions & 15 deletions tests/python/contrib/test_ethosu/cascader/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,16 @@

pytest.importorskip("ethosu.vela")

import numpy as np

import tvm
from tvm import relay
from tvm.relay.backend.contrib.ethosu.codegen import _create_cascader, copy_constants
from tvm.relay.backend.contrib.ethosu.codegen import _create_cascader
from tvm.relay.backend.contrib.ethosu.tir.compiler import _lower_to_tir
from tvm.contrib.ethosu.cascader import MemoryRegion, EthosuDeviceConfig, CascaderOptions
from tvm.contrib.ethosu.cascader import MemoryRegion, EthosuDeviceConfig

from .. import infra
from .. import infra as test_infra
from . import infra as cascader_test_infra


def _ethos_u55_cascader():
Expand All @@ -46,12 +49,16 @@ def _ethos_u55_cascader():
flash = MemoryRegion(name="FLASH", size=10**7, read_bandwidth=4, write_bandwidth=4)

device_config = EthosuDeviceConfig("ethos-u55-256")
cascader_options = CascaderOptions(
cascader_options = cascader_test_infra.make_options(
cascade_region=sram,
max_proposals=64,
stripe_factors=5,
stripe_factors=4,
max_plan_size=10,
max_open_plans=8,
max_closed_plans=32,
always_copy_size=1024,
disable_pareto_plans=False,
disable_pareto_proposals=False,
enable_striping=False,
)
return _create_cascader(
Expand All @@ -63,24 +70,76 @@ def _ethos_u55_cascader():
)


def _compile_model(relay_function):
mod = tvm.IRModule()
mod["main"] = relay_function
mod = relay.transform.InferType()(mod)
tir_mod = _lower_to_tir(mod["main"], _ethos_u55_cascader())[0]
return tir_mod["main"]


def _create_single_conv2d():
ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
conv1 = infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
conv1 = test_infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
func = relay.Function(relay.analysis.free_vars(conv1), conv1)
return func


def test_check_compute_cycle_hint():
"""Check the "compute_cycle_hint" annotation remains in the lowering flow."""
relay_function = _create_single_conv2d()
mod = tvm.IRModule()
mod["main"] = relay_function
mod = relay.transform.InferType()(mod)
tir_mod = _lower_to_tir(mod["main"], _ethos_u55_cascader())[0]
primfunc = tir_mod["main"]
def _create_double_conv2d():
ifm = relay.var("x", shape=(1, 8, 8, 4), dtype="int8")
conv1 = test_infra.make_ethosu_conv2d(ifm, 4, 4, (3, 3), (1, 1), (1, 1), (1, 1))
conv2 = test_infra.make_ethosu_conv2d(conv1, 4, 4, (1, 3), (1, 1), (1, 1), (1, 1))
func = relay.Function(relay.analysis.free_vars(conv2), conv2)
return func


def _create_scalar_add():
ifm = relay.var("x", shape=(1, 5, 4, 3), dtype="int8")
ifm2 = relay.const(np.ones((1, 1, 1, 1)), dtype="int8")
add = test_infra.make_ethosu_binary_elementwise(
ifm, ifm2, ifm_channels=3, ifm2_channels=1, operator_type="ADD", ofm_dtype="int8"
)
func = relay.Function(relay.analysis.free_vars(add), add)
return func


def test_single_conv_compute_cycles_hint():
"""
Check the "compute_cycles_hint" annotation remains in the lowering flow
for single convolution.
"""
primfunc = _compile_model(_create_single_conv2d())
ops = primfunc.body.body.body.seq

compute_cycles_hints = [256, 304, 320]
compute_cycles_hints = [2304, 640, 320]
for op, compute_cycle_hint in zip(ops, compute_cycles_hints):
assert op.attr_key == "pragma_compute_cycles_hint"
assert op.value == compute_cycle_hint


def test_double_conv_compute_cycles_hint():
"""
Check the "compute_cycles_hint" annotation remains in the lowering flow
for double convolution.
"""
primfunc = _compile_model(_create_double_conv2d())
ops = primfunc.body.body.body.body.seq

compute_cycles_hints = [2304, 640, 320, 768, 640, 240]
for op, compute_cycle_hint in zip(ops, compute_cycles_hints):
assert op.attr_key == "pragma_compute_cycles_hint"
assert op.value == compute_cycle_hint


def test_scalar_add_compute_cycles_hint():
"""
Check the "compute_cycles_hint" annotation remains in the lowering flow
for add with scalar values.
"""
primfunc = _compile_model(_create_scalar_add())
ops = primfunc.body.body.seq

compute_cycles_hints = [16, 24]
for op, compute_cycle_hint in zip(ops, compute_cycles_hints):
assert op.attr_key == "pragma_compute_cycles_hint"
assert op.value == compute_cycle_hint
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_compute_cycles_annotation(SRAM, FLASH, TwoConv2DTE):
# [copy, copy, conv2d, copy, conv2d]
stages = [6, 8, 9, 18, 19]
# Expected hints for each operation
compute_cycles_hints = [1440, 4624, 1440, 2844, 2304]
compute_cycles_hints = [4096, 5120, 1632, 2560, 2304]

for stage, compute_cycles_hint in zip(stages, compute_cycles_hints):
op = sch.stages[stage]
Expand Down

0 comments on commit 954c56e

Please sign in to comment.