Skip to content

Commit

Permalink
[TIR][USMP] Greedy algorithms for USMP
Browse files Browse the repository at this point in the history
This commits removes commented out lines
,few trivial cleanups and few BufferInfo
based tests to check the algorithm.

Change-Id: I1a12b6a424370e9e4c4a55563dde0ad698b07ea3
  • Loading branch information
manupak committed Nov 30, 2021
1 parent 78e099b commit 5e75ed3
Show file tree
Hide file tree
Showing 4 changed files with 215 additions and 30 deletions.
4 changes: 4 additions & 0 deletions python/tvm/tir/usmp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def __init__(
alignment,
)

def set_conflicts(self, conflicts: list):
"""Sets the the conflicting array of buffer info objects"""
_ffi_api.BufferInfoSetConflicts(self, conflicts)


@register_object("tir.usmp.PoolAllocation")
class PoolAllocation(Object):
Expand Down
10 changes: 5 additions & 5 deletions src/tir/usmp/algo/greedy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*/

/*!
* \file tir/analysis/usmp/algo/greedy_by_size.cc
* \file tir/analysis/usmp/algo/greedy.cc
* \brief This source contains greedy algorithms for planning
* memory for USMP. There are two algorithms present here :
* 1) greedy_by_size and 2) greedy_by_conflicts.
Expand Down Expand Up @@ -89,17 +89,17 @@ class GreedyBase {
* \brief Selects a pool for placement in the given set of ordered pool candidates
*/
PoolInfo SelectPlacementPool(
const Array<PoolInfo>& pool_candidates,
const BufferInfo& buf_info,
const std::unordered_map<PoolInfo, size_t, ObjectPtrHash, ObjectPtrEqual>& pool_offsets) {
// Here the pool candidates are ordered when it is consumed by the algorithm.
// This could be from order the user has specified. However, schedulers are
// welcome to change the order for performance reasons.
for (const auto& pool_info : pool_candidates) {
for (const auto& pool_info : buf_info->pool_candidates) {
if (pool_offsets.count(pool_info)) {
return pool_info;
}
}
ICHECK(false) << "TVM USMP Internal Error: no candidate have been selected!";
CHECK(false) << "TVM USMP Error: no candidate have been selected for " << buf_info;
return PoolInfo();
}

Expand Down Expand Up @@ -141,7 +141,7 @@ class GreedyBase {
}
}
}
auto selected_pool = SelectPlacementPool(buf_info->pool_candidates, pool_offset_candidates);
auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates);
pool_allocations.Set(
buf_info, PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool])));
}
Expand Down
3 changes: 0 additions & 3 deletions src/tir/usmp/analysis/extract_buffer_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_

// Traverse the liveness events using a open set to track what
// is live while updating the conflicts through out the linear traversal
// std::unordered_set<BufferInfo, ObjectPtrHash, ObjectPtrEqual> open_set;
std::unordered_map<BufferInfo, int, ObjectPtrHash, ObjectPtrEqual> open_set;
for (const auto& le_event : le_events_timeline) {
if (le_event.le_type == START) {
Expand All @@ -465,7 +464,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
le_event.buffer_info->conflicts.push_back(open_buffer_info);
}
}
// open_set.insert(le_event.buffer_info);
if (open_set.find(le_event.buffer_info) == open_set.end()) {
open_set[le_event.buffer_info] = 1;
} else {
Expand All @@ -477,7 +475,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
} else {
open_set[le_event.buffer_info] -= 1;
}
// open_set.erase(le_event.buffer_info);
}
}
return this->buffer_info_map_;
Expand Down
228 changes: 206 additions & 22 deletions tests/python/unittest/test_tir_usmp_algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def get_allocate(stmt):
return allocates


def assign_poolinfos_to_allocates_in_primfunc(primfunc, pool_infos):
def _assign_poolinfos_to_allocates_in_primfunc(primfunc, pool_infos):
"""helper to assing poolinfos to allocate nodes in a tir.PrimFunc"""

def set_poolinfos(stmt):
Expand All @@ -68,12 +68,12 @@ def set_poolinfos(stmt):
return primfunc.with_body(stmt_functor.ir_transform(primfunc.body, None, set_poolinfos))


def assign_poolinfos_to_allocates_in_irmodule(mod, pool_infos):
def _assign_poolinfos_to_allocates_in_irmodule(mod, pool_infos):
"""helper to assing poolinfos to allocate nodes in a IRModule"""
ret = tvm.IRModule()
for global_var, basefunc in mod.functions.items():
if isinstance(basefunc, tvm.tir.PrimFunc):
ret[global_var] = assign_poolinfos_to_allocates_in_primfunc(basefunc, pool_infos)
ret[global_var] = _assign_poolinfos_to_allocates_in_primfunc(basefunc, pool_infos)
return ret


Expand All @@ -96,6 +96,201 @@ def _check_max_workspace_size(buffer_pool_allocations, pool_info, size):
assert max_workspace_size == size


def test_no_pool_error():
target = Target("c")
tiny_workspace_pool = usmp_utils.PoolInfo(
pool_name="tiny_workspace",
target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
size_hint_bytes=10,
)
bi_a = usmp_utils.BufferInfo(
name_hint="bi_a", size_bytes=10, pool_candidates=[tiny_workspace_pool]
)
bi_b = usmp_utils.BufferInfo(
name_hint="bi_b", size_bytes=10, pool_candidates=[tiny_workspace_pool]
)
bi_c = usmp_utils.BufferInfo(
name_hint="bi_c", size_bytes=10, pool_candidates=[tiny_workspace_pool]
)
bi_a.set_conflicts([bi_b])
bi_b.set_conflicts([bi_c])
bi_c.set_conflicts([bi_a])
buffer_info_arr = [bi_a, bi_b, bi_c]
fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.greedy_by_size")
with pytest.raises(
tvm.TVMError, match="TVM USMP Error: no candidate have been selected for BufferInfoNode"
):
buffer_pool_allocations = fusmp_algo(buffer_info_arr)


@pytest.mark.parametrize("algorithm", ["greedy_by_size", "greedy_by_conflicts"])
def test_name_based_ordering(algorithm):
""" This checks when the size and conlicts are same a stable result is generated"""

def _test():
target = Target("c")
global_workspace_pool = usmp_utils.PoolInfo(
pool_name="global_workspace",
target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
)
bi_a = usmp_utils.BufferInfo(
name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
)
bi_b = usmp_utils.BufferInfo(
name_hint="bi_b", size_bytes=10, pool_candidates=[global_workspace_pool]
)
bi_c = usmp_utils.BufferInfo(
name_hint="bi_c", size_bytes=10, pool_candidates=[global_workspace_pool]
)
bi_a.set_conflicts([bi_b])
bi_b.set_conflicts([bi_c])
bi_c.set_conflicts([bi_a])

buffer_info_arr = [bi_a, bi_b, bi_c]
fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo(buffer_info_arr)
assert buffer_pool_allocations[bi_a].byte_offset == 0
assert buffer_pool_allocations[bi_b].byte_offset == 20
assert buffer_pool_allocations[bi_c].byte_offset == 10

# This is tested for several times to check stability
for x in range(0, 10):
_test()


@pytest.mark.parametrize(
["algorithm", "workspace_size"],
[("greedy_by_size", 140), ("greedy_by_conflicts", 140)],
)
def test_linear(algorithm, workspace_size):
"""
The test case here represent BufferInfo objects
that could get generated for a linear sequence
such as :
(Op A)
|
bi_a
|
(Op B)
|
bi_b
|
.
.
.
(Op F)
|
bi_f
"""
target = Target("c")
global_workspace_pool = usmp_utils.PoolInfo(
pool_name="global_workspace",
target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
)
bi_a = usmp_utils.BufferInfo(
name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
)
bi_b = usmp_utils.BufferInfo(
name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
)
bi_c = usmp_utils.BufferInfo(
name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
)
bi_d = usmp_utils.BufferInfo(
name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
)
bi_e = usmp_utils.BufferInfo(
name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
)
bi_f = usmp_utils.BufferInfo(
name_hint="bi_f", size_bytes=50, pool_candidates=[global_workspace_pool]
)

# Creating conflicts for a linear graph
bi_a.set_conflicts([bi_b])
bi_b.set_conflicts([bi_a, bi_c])
bi_c.set_conflicts([bi_b, bi_d])
bi_d.set_conflicts([bi_c, bi_e])
bi_e.set_conflicts([bi_d, bi_f])
bi_f.set_conflicts([bi_e])

buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f]
fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo(buffer_info_arr)
_check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)


@pytest.mark.parametrize(
["algorithm", "workspace_size"],
[("greedy_by_size", 190), ("greedy_by_conflicts", 320)],
)
def test_fanout(algorithm, workspace_size):
"""
The test case here represent BufferInfo objects
that could get generated for a fanout topology
such as :
(Op A)
|
bi_a ---------
| |
(Op B) (Op C)
| |
bi_b bi_c
| |
(Op D) (Op E)
| |
bi_d bi_e
| |
(Op F) ------
|
bi_f
|
(Op G)
|
bi_g
"""
target = Target("c")
global_workspace_pool = usmp_utils.PoolInfo(
pool_name="global_workspace",
target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
)
bi_a = usmp_utils.BufferInfo(
name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
)
bi_b = usmp_utils.BufferInfo(
name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
)
bi_c = usmp_utils.BufferInfo(
name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
)
bi_d = usmp_utils.BufferInfo(
name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
)
bi_e = usmp_utils.BufferInfo(
name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
)
bi_f = usmp_utils.BufferInfo(
name_hint="bi_f", size_bytes=60, pool_candidates=[global_workspace_pool]
)
bi_g = usmp_utils.BufferInfo(
name_hint="bi_g", size_bytes=70, pool_candidates=[global_workspace_pool]
)

# Creating conflicts for a linear graph
bi_a.set_conflicts([bi_b, bi_c])
bi_b.set_conflicts([bi_a, bi_c, bi_e])
bi_c.set_conflicts([bi_e, bi_a, bi_b, bi_d])
bi_d.set_conflicts([bi_b, bi_f, bi_c, bi_e])
bi_e.set_conflicts([bi_c, bi_f, bi_b, bi_d])
bi_f.set_conflicts([bi_d, bi_e, bi_f])
bi_g.set_conflicts([bi_f])

buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f, bi_g]
fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo(buffer_info_arr)
_check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)


# fmt: off
@tvm.script.ir_module
class LinearStructure:
Expand Down Expand Up @@ -167,22 +362,11 @@ def run_model(input: T.handle, output: T.handle) -> None:
# fmt: on


def print_conflicts(buffer_info_map):
"""_verify_conflicts("sid_8", ["Conv2dOutput_7", "tensor_2"], buffer_info_map)"""

for buffer_info_name, buf_info in buffer_info_map.items():
conflict_str = "["
for conflict in buf_info.conflicts:
conflict_str += f'"{conflict.name_hint}", '
conflict_str += "]"
print(f'_verify_conflicts("{buffer_info_name}", {conflict_str}, buffer_info_map_names)')


@pytest.mark.parametrize(
["algorithm", "fast_memory_size", "slow_memory_size"],
[("greedy_by_size", 200704, 1418528), ("greedy_by_conflicts", 200704, 1418528)],
)
def test_linear(algorithm, fast_memory_size, slow_memory_size):
def test_mobilenet_subgraph(algorithm, fast_memory_size, slow_memory_size):
target = Target("c")
fast_memory_pool = usmp_utils.PoolInfo(
pool_name="fast_memory",
Expand All @@ -194,16 +378,16 @@ def test_linear(algorithm, fast_memory_size, slow_memory_size):
)
tir_mod = LinearStructure
tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
tir_mod = assign_poolinfos_to_allocates_in_irmodule(
tir_mod = _assign_poolinfos_to_allocates_in_irmodule(
tir_mod, [fast_memory_pool, slow_memory_pool]
)
main_func = tir_mod["run_model"]
buffer_info_map = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)

fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
buffer_info_arr = fcreate_array_bi(buffer_info_map)
fusmp_algo_greedy_by_size = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo_greedy_by_size(buffer_info_arr)
fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo(buffer_info_arr)

buffer_info_map_names = dict()
for buf_info in buffer_info_arr:
Expand Down Expand Up @@ -346,22 +530,22 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
@pytest.mark.parametrize(
["algorithm", "workspace_size"], [("greedy_by_size", 7920256), ("greedy_by_conflicts", 7200256)]
)
def test_fanout(algorithm, workspace_size):
def test_resnet_subgraph(algorithm, workspace_size):
target = Target("c")
global_workspace_pool = usmp_utils.PoolInfo(
pool_name="global_workspace",
target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
)
tir_mod = ResnetStructure
tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
tir_mod = assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
main_func = tir_mod["tvmgen_default_run_model"]
buffer_info_map = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)

fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
buffer_info_arr = fcreate_array_bi(buffer_info_map)
fusmp_algo_greedy_by_size = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo_greedy_by_size(buffer_info_arr)
fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
buffer_pool_allocations = fusmp_algo(buffer_info_arr)

buffer_info_map_names = dict()
for buf_info in buffer_info_arr:
Expand Down

0 comments on commit 5e75ed3

Please sign in to comment.