From 5e75ed35daa721d7b1422ca27dcf932d8c73f850 Mon Sep 17 00:00:00 2001
From: Manupa Karunaratne <manupa.karunaratne@arm.com>
Date: Tue, 30 Nov 2021 19:55:31 +0000
Subject: [PATCH] [TIR][USMP] Greedy algorithms for USMP

This commits removes commented out lines
,few trivial cleanups and few BufferInfo
based tests to check the algorithm.

Change-Id: I1a12b6a424370e9e4c4a55563dde0ad698b07ea3
---
 python/tvm/tir/usmp/utils.py                 |   4 +
 src/tir/usmp/algo/greedy.cc                  |  10 +-
 src/tir/usmp/analysis/extract_buffer_info.cc |   3 -
 tests/python/unittest/test_tir_usmp_algo.py  | 228 +++++++++++++++++--
 4 files changed, 215 insertions(+), 30 deletions(-)
diff --git a/python/tvm/tir/usmp/utils.py b/python/tvm/tir/usmp/utils.py
index 188d4c57810ba..470765174acb0 100644
--- a/python/tvm/tir/usmp/utils.py
+++ b/python/tvm/tir/usmp/utils.py
@@ -114,6 +114,10 @@ def __init__(
             alignment,
         )
 
+    def set_conflicts(self, conflicts: list):
+        """Sets the the conflicting array of buffer info objects"""
+        _ffi_api.BufferInfoSetConflicts(self, conflicts)
+
 
 @register_object("tir.usmp.PoolAllocation")
 class PoolAllocation(Object):
diff --git a/src/tir/usmp/algo/greedy.cc b/src/tir/usmp/algo/greedy.cc
index f0b1581cd616f..78afe0333c992 100644
--- a/src/tir/usmp/algo/greedy.cc
+++ b/src/tir/usmp/algo/greedy.cc
@@ -18,7 +18,7 @@
  */
 
 /*!
- * \file tir/analysis/usmp/algo/greedy_by_size.cc
+ * \file tir/analysis/usmp/algo/greedy.cc
  * \brief This source contains greedy algorithms for planning
  * memory for USMP. There are two algorithms present here :
  * 1) greedy_by_size and 2) greedy_by_conflicts.
@@ -89,17 +89,17 @@ class GreedyBase {
    * \brief Selects a pool for placement in the given set of ordered pool candidates
    */
   PoolInfo SelectPlacementPool(
-      const Array<PoolInfo>& pool_candidates,
+      const BufferInfo& buf_info,
       const std::unordered_map<PoolInfo, size_t, ObjectPtrHash, ObjectPtrEqual>& pool_offsets) {
     // Here the pool candidates are ordered when it is consumed by the algorithm.
     // This could be from order the user has specified. However, schedulers are
     // welcome to change the order for performance reasons.
-    for (const auto& pool_info : pool_candidates) {
+    for (const auto& pool_info : buf_info->pool_candidates) {
       if (pool_offsets.count(pool_info)) {
         return pool_info;
       }
     }
-    ICHECK(false) << "TVM USMP Internal Error: no candidate have been selected!";
+    CHECK(false) << "TVM USMP Error: no candidate have been selected for " << buf_info;
     return PoolInfo();
   }
 
@@ -141,7 +141,7 @@ class GreedyBase {
           }
         }
       }
-      auto selected_pool = SelectPlacementPool(buf_info->pool_candidates, pool_offset_candidates);
+      auto selected_pool = SelectPlacementPool(buf_info, pool_offset_candidates);
       pool_allocations.Set(
           buf_info, PoolAllocation(selected_pool, Integer(pool_offset_candidates[selected_pool])));
     }
diff --git a/src/tir/usmp/analysis/extract_buffer_info.cc b/src/tir/usmp/analysis/extract_buffer_info.cc
index f82db06685f40..3fea7211672f7 100644
--- a/src/tir/usmp/analysis/extract_buffer_info.cc
+++ b/src/tir/usmp/analysis/extract_buffer_info.cc
@@ -454,7 +454,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
 
   // Traverse the liveness events using a open set to track what
   // is live while updating the conflicts through out the linear traversal
-  //  std::unordered_set<BufferInfo, ObjectPtrHash, ObjectPtrEqual> open_set;
   std::unordered_map<BufferInfo, int, ObjectPtrHash, ObjectPtrEqual> open_set;
   for (const auto& le_event : le_events_timeline) {
     if (le_event.le_type == START) {
@@ -465,7 +464,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
           le_event.buffer_info->conflicts.push_back(open_buffer_info);
         }
       }
-      //      open_set.insert(le_event.buffer_info);
       if (open_set.find(le_event.buffer_info) == open_set.end()) {
         open_set[le_event.buffer_info] = 1;
       } else {
@@ -477,7 +475,6 @@ Map<BufferInfo, tir::Stmt> BufferInfoExtractor::operator()(const PrimFunc& main_
       } else {
         open_set[le_event.buffer_info] -= 1;
       }
-      //      open_set.erase(le_event.buffer_info);
     }
   }
   return this->buffer_info_map_;
diff --git a/tests/python/unittest/test_tir_usmp_algo.py b/tests/python/unittest/test_tir_usmp_algo.py
index 69d52009fefcd..219456ecd6a6c 100644
--- a/tests/python/unittest/test_tir_usmp_algo.py
+++ b/tests/python/unittest/test_tir_usmp_algo.py
@@ -51,7 +51,7 @@ def get_allocate(stmt):
     return allocates
 
 
-def assign_poolinfos_to_allocates_in_primfunc(primfunc, pool_infos):
+def _assign_poolinfos_to_allocates_in_primfunc(primfunc, pool_infos):
     """helper to assing poolinfos to allocate nodes in a tir.PrimFunc"""
 
     def set_poolinfos(stmt):
@@ -68,12 +68,12 @@ def set_poolinfos(stmt):
     return primfunc.with_body(stmt_functor.ir_transform(primfunc.body, None, set_poolinfos))
 
 
-def assign_poolinfos_to_allocates_in_irmodule(mod, pool_infos):
+def _assign_poolinfos_to_allocates_in_irmodule(mod, pool_infos):
     """helper to assing poolinfos to allocate nodes in a IRModule"""
     ret = tvm.IRModule()
     for global_var, basefunc in mod.functions.items():
         if isinstance(basefunc, tvm.tir.PrimFunc):
-            ret[global_var] = assign_poolinfos_to_allocates_in_primfunc(basefunc, pool_infos)
+            ret[global_var] = _assign_poolinfos_to_allocates_in_primfunc(basefunc, pool_infos)
     return ret
 
 
@@ -96,6 +96,201 @@ def _check_max_workspace_size(buffer_pool_allocations, pool_info, size):
     assert max_workspace_size == size
 
 
+def test_no_pool_error():
+    target = Target("c")
+    tiny_workspace_pool = usmp_utils.PoolInfo(
+        pool_name="tiny_workspace",
+        target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+        size_hint_bytes=10,
+    )
+    bi_a = usmp_utils.BufferInfo(
+        name_hint="bi_a", size_bytes=10, pool_candidates=[tiny_workspace_pool]
+    )
+    bi_b = usmp_utils.BufferInfo(
+        name_hint="bi_b", size_bytes=10, pool_candidates=[tiny_workspace_pool]
+    )
+    bi_c = usmp_utils.BufferInfo(
+        name_hint="bi_c", size_bytes=10, pool_candidates=[tiny_workspace_pool]
+    )
+    bi_a.set_conflicts([bi_b])
+    bi_b.set_conflicts([bi_c])
+    bi_c.set_conflicts([bi_a])
+    buffer_info_arr = [bi_a, bi_b, bi_c]
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.greedy_by_size")
+    with pytest.raises(
+        tvm.TVMError, match="TVM USMP Error: no candidate have been selected for BufferInfoNode"
+    ):
+        buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+
+
+@pytest.mark.parametrize("algorithm", ["greedy_by_size", "greedy_by_conflicts"])
+def test_name_based_ordering(algorithm):
+    """ This checks when the size and conlicts are same a stable result is generated"""
+
+    def _test():
+        target = Target("c")
+        global_workspace_pool = usmp_utils.PoolInfo(
+            pool_name="global_workspace",
+            target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+        )
+        bi_a = usmp_utils.BufferInfo(
+            name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
+        )
+        bi_b = usmp_utils.BufferInfo(
+            name_hint="bi_b", size_bytes=10, pool_candidates=[global_workspace_pool]
+        )
+        bi_c = usmp_utils.BufferInfo(
+            name_hint="bi_c", size_bytes=10, pool_candidates=[global_workspace_pool]
+        )
+        bi_a.set_conflicts([bi_b])
+        bi_b.set_conflicts([bi_c])
+        bi_c.set_conflicts([bi_a])
+
+        buffer_info_arr = [bi_a, bi_b, bi_c]
+        fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+        buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+        assert buffer_pool_allocations[bi_a].byte_offset == 0
+        assert buffer_pool_allocations[bi_b].byte_offset == 20
+        assert buffer_pool_allocations[bi_c].byte_offset == 10
+
+    # This is tested for several times to check stability
+    for x in range(0, 10):
+        _test()
+
+
+@pytest.mark.parametrize(
+    ["algorithm", "workspace_size"],
+    [("greedy_by_size", 140), ("greedy_by_conflicts", 140)],
+)
+def test_linear(algorithm, workspace_size):
+    """
+    The test case here represent BufferInfo objects
+    that could get generated for a linear sequence
+    such as :
+    (Op A)
+    |
+    bi_a
+    |
+    (Op B)
+    |
+    bi_b
+    |
+    .
+    .
+    .
+    (Op F)
+    |
+    bi_f
+    """
+    target = Target("c")
+    global_workspace_pool = usmp_utils.PoolInfo(
+        pool_name="global_workspace",
+        target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+    )
+    bi_a = usmp_utils.BufferInfo(
+        name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
+    )
+    bi_b = usmp_utils.BufferInfo(
+        name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
+    )
+    bi_c = usmp_utils.BufferInfo(
+        name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
+    )
+    bi_d = usmp_utils.BufferInfo(
+        name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
+    )
+    bi_e = usmp_utils.BufferInfo(
+        name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
+    )
+    bi_f = usmp_utils.BufferInfo(
+        name_hint="bi_f", size_bytes=50, pool_candidates=[global_workspace_pool]
+    )
+
+    # Creating conflicts for a linear graph
+    bi_a.set_conflicts([bi_b])
+    bi_b.set_conflicts([bi_a, bi_c])
+    bi_c.set_conflicts([bi_b, bi_d])
+    bi_d.set_conflicts([bi_c, bi_e])
+    bi_e.set_conflicts([bi_d, bi_f])
+    bi_f.set_conflicts([bi_e])
+
+    buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f]
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
+
+
+@pytest.mark.parametrize(
+    ["algorithm", "workspace_size"],
+    [("greedy_by_size", 190), ("greedy_by_conflicts", 320)],
+)
+def test_fanout(algorithm, workspace_size):
+    """
+    The test case here represent BufferInfo objects
+    that could get generated for a fanout topology
+    such as :
+    (Op A)
+    |
+    bi_a ---------
+    |            |
+    (Op B)     (Op C)
+    |            |
+    bi_b        bi_c
+    |            |
+    (Op D)     (Op E)
+    |            |
+    bi_d        bi_e
+    |            |
+    (Op F) ------
+    |
+    bi_f
+    |
+    (Op G)
+    |
+    bi_g
+    """
+    target = Target("c")
+    global_workspace_pool = usmp_utils.PoolInfo(
+        pool_name="global_workspace",
+        target_access={target: usmp_utils.PoolInfo.READ_WRITE_ACCESS},
+    )
+    bi_a = usmp_utils.BufferInfo(
+        name_hint="bi_a", size_bytes=10, pool_candidates=[global_workspace_pool]
+    )
+    bi_b = usmp_utils.BufferInfo(
+        name_hint="bi_b", size_bytes=20, pool_candidates=[global_workspace_pool]
+    )
+    bi_c = usmp_utils.BufferInfo(
+        name_hint="bi_c", size_bytes=100, pool_candidates=[global_workspace_pool]
+    )
+    bi_d = usmp_utils.BufferInfo(
+        name_hint="bi_d", size_bytes=40, pool_candidates=[global_workspace_pool]
+    )
+    bi_e = usmp_utils.BufferInfo(
+        name_hint="bi_e", size_bytes=50, pool_candidates=[global_workspace_pool]
+    )
+    bi_f = usmp_utils.BufferInfo(
+        name_hint="bi_f", size_bytes=60, pool_candidates=[global_workspace_pool]
+    )
+    bi_g = usmp_utils.BufferInfo(
+        name_hint="bi_g", size_bytes=70, pool_candidates=[global_workspace_pool]
+    )
+
+    # Creating conflicts for a linear graph
+    bi_a.set_conflicts([bi_b, bi_c])
+    bi_b.set_conflicts([bi_a, bi_c, bi_e])
+    bi_c.set_conflicts([bi_e, bi_a, bi_b, bi_d])
+    bi_d.set_conflicts([bi_b, bi_f, bi_c, bi_e])
+    bi_e.set_conflicts([bi_c, bi_f, bi_b, bi_d])
+    bi_f.set_conflicts([bi_d, bi_e, bi_f])
+    bi_g.set_conflicts([bi_f])
+
+    buffer_info_arr = [bi_a, bi_b, bi_c, bi_d, bi_e, bi_f, bi_g]
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
+    _check_max_workspace_size(buffer_pool_allocations, global_workspace_pool, workspace_size)
+
+
 # fmt: off
 @tvm.script.ir_module
 class LinearStructure:
@@ -167,22 +362,11 @@ def run_model(input: T.handle, output: T.handle) -> None:
 # fmt: on
 
 
-def print_conflicts(buffer_info_map):
-    """_verify_conflicts("sid_8", ["Conv2dOutput_7", "tensor_2"], buffer_info_map)"""
-
-    for buffer_info_name, buf_info in buffer_info_map.items():
-        conflict_str = "["
-        for conflict in buf_info.conflicts:
-            conflict_str += f'"{conflict.name_hint}", '
-        conflict_str += "]"
-        print(f'_verify_conflicts("{buffer_info_name}", {conflict_str}, buffer_info_map_names)')
-
-
 @pytest.mark.parametrize(
     ["algorithm", "fast_memory_size", "slow_memory_size"],
     [("greedy_by_size", 200704, 1418528), ("greedy_by_conflicts", 200704, 1418528)],
 )
-def test_linear(algorithm, fast_memory_size, slow_memory_size):
+def test_mobilenet_subgraph(algorithm, fast_memory_size, slow_memory_size):
     target = Target("c")
     fast_memory_pool = usmp_utils.PoolInfo(
         pool_name="fast_memory",
@@ -194,7 +378,7 @@ def test_linear(algorithm, fast_memory_size, slow_memory_size):
     )
     tir_mod = LinearStructure
     tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
-    tir_mod = assign_poolinfos_to_allocates_in_irmodule(
+    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(
         tir_mod, [fast_memory_pool, slow_memory_pool]
     )
     main_func = tir_mod["run_model"]
@@ -202,8 +386,8 @@ def test_linear(algorithm, fast_memory_size, slow_memory_size):
 
     fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
     buffer_info_arr = fcreate_array_bi(buffer_info_map)
-    fusmp_algo_greedy_by_size = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
-    buffer_pool_allocations = fusmp_algo_greedy_by_size(buffer_info_arr)
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
 
     buffer_info_map_names = dict()
     for buf_info in buffer_info_arr:
@@ -346,7 +530,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast(place
 @pytest.mark.parametrize(
     ["algorithm", "workspace_size"], [("greedy_by_size", 7920256), ("greedy_by_conflicts", 7200256)]
 )
-def test_fanout(algorithm, workspace_size):
+def test_resnet_subgraph(algorithm, workspace_size):
     target = Target("c")
     global_workspace_pool = usmp_utils.PoolInfo(
         pool_name="global_workspace",
@@ -354,14 +538,14 @@ def test_fanout(algorithm, workspace_size):
     )
     tir_mod = ResnetStructure
     tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target)
-    tir_mod = assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
+    tir_mod = _assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool])
     main_func = tir_mod["tvmgen_default_run_model"]
     buffer_info_map = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod)
 
     fcreate_array_bi = tvm.get_global_func("tir.usmp.CreateArrayBufferInfo")
     buffer_info_arr = fcreate_array_bi(buffer_info_map)
-    fusmp_algo_greedy_by_size = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
-    buffer_pool_allocations = fusmp_algo_greedy_by_size(buffer_info_arr)
+    fusmp_algo = tvm.get_global_func(f"tir.usmp.algo.{algorithm}")
+    buffer_pool_allocations = fusmp_algo(buffer_info_arr)
 
     buffer_info_map_names = dict()
     for buf_info in buffer_info_arr: