Skip to content

Commit

Permalink
[async] Add allocator async state (#1973)
Browse files Browse the repository at this point in the history
* [async] Add allocator async state

* mask state

* [skip ci] enforce code format

Co-authored-by: Taichi Gardener <taichigardener@gmail.com>
  • Loading branch information
k-ye and taichi-gardener authored Oct 19, 2020
1 parent b8a1bc4 commit 4a56852
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 9 deletions.
26 changes: 25 additions & 1 deletion taichi/program/async_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,19 @@ TaskMeta *get_task_meta(IRBank *ir_bank, const TaskLaunchRecord &t) {
}
}

if (auto *snode_op = stmt->cast<SNodeOpStmt>()) {
if (snode_op->op_type == SNodeOpType::activate ||
snode_op->op_type == SNodeOpType::deactivate) {
auto *sn = snode_op->snode;
if (is_gc_able(sn->type)) {
meta.input_states.emplace(sn, AsyncState::Type::allocator);
meta.input_states.emplace(sn, AsyncState::Type::mask);
meta.output_states.emplace(sn, AsyncState::Type::allocator);
meta.output_states.emplace(sn, AsyncState::Type::mask);
}
}
}

if (auto ptr = stmt->cast<GlobalPtrStmt>()) {
if (ptr->activate) {
for (auto &snode : ptr->snodes.data) {
Expand All @@ -127,6 +140,10 @@ TaskMeta *get_task_meta(IRBank *ir_bank, const TaskLaunchRecord &t) {
if (!s->is_path_all_dense) {
meta.input_states.emplace(s, AsyncState::Type::mask);
meta.output_states.emplace(s, AsyncState::Type::mask);
if (is_gc_able(s->type)) {
meta.input_states.emplace(s, AsyncState::Type::allocator);
meta.output_states.emplace(s, AsyncState::Type::allocator);
}
}
s = s->parent;
}
Expand Down Expand Up @@ -170,6 +187,11 @@ TaskMeta *get_task_meta(IRBank *ir_bank, const TaskLaunchRecord &t) {
} else if (root_stmt->task_type == OffloadedTaskType::struct_for) {
meta.snode = root_stmt->snode;
meta.input_states.emplace(root_stmt->snode, AsyncState::Type::list);
} else if ((root_stmt->task_type == OffloadedTaskType::gc) &&
(is_gc_able(root_stmt->snode->type))) {
meta.snode = root_stmt->snode;
meta.input_states.emplace(meta.snode, AsyncState::Type::allocator);
meta.output_states.emplace(meta.snode, AsyncState::Type::allocator);
}

meta_bank[t.ir_handle] = meta;
Expand Down Expand Up @@ -217,7 +239,9 @@ TaskFusionMeta get_task_fusion_meta(IRBank *bank, const TaskLaunchRecord &t) {
meta.end_value = task->end_value;
} else if (task->task_type != OffloadedTaskType::serial) {
// Do not fuse gc/listgen tasks.
return fusion_meta_bank[t.ir_handle] = TaskFusionMeta();
meta.fusible = false;
meta.snode = task->snode;
return fusion_meta_bank[t.ir_handle] = meta;
}
meta.fusible = true;
return fusion_meta_bank[t.ir_handle] = meta;
Expand Down
5 changes: 4 additions & 1 deletion taichi/program/async_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class TaskLaunchRecord {
};

struct AsyncState {
enum class Type { mask, value, list };
enum class Type { mask, value, list, allocator };

AsyncState(SNode *snode, Type type) : snode(snode), type(type) {
}
Expand Down Expand Up @@ -106,6 +106,9 @@ struct AsyncState {
case Type::list:
type_name = "list";
break;
case Type::allocator:
type_name = "allocator";
break;
}
return snode->get_node_type_name_hinted() + "_" + type_name;
}
Expand Down
8 changes: 5 additions & 3 deletions taichi/program/state_flow_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,10 +450,12 @@ std::unordered_set<int> StateFlowGraph::fuse_range(int begin, int end) {
fusion_meta[a] != fusion_meta[b]) {
return false;
}
if (nodes[a]->meta->type != OffloadedStmt::TaskType::serial) {
if (nodes[a]->meta->type != OffloadedTaskType::serial) {
for (auto &state : nodes[a]->output_edges) {
if (state.first.type != AsyncState::Type::value) {
// No need to check mask/list states as there must be value states.
const auto sty = state.first.type;
if (sty != AsyncState::Type::value && sty != AsyncState::Type::mask) {
// No need to check allocator/list states, as they must be accompanied
// with either value or mask states.
continue;
}
if (state.second.find(nodes[b]) != state.second.end()) {
Expand Down
58 changes: 54 additions & 4 deletions tests/python/test_gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


@ti.test(require=ti.extension.sparse)
def test_block():
def _test_block_gc():
N = 100000

dx = 1 / 128
Expand Down Expand Up @@ -38,14 +38,29 @@ def move():
x[p] += ti.Vector([0.0, 0.1])

assert grid.num_dynamically_allocated == 0
for i in range(100):
for _ in range(100):
grid.deactivate_all()
# Scatter the particles to the sparse grid
build_grid()
# Move the block of particles
move()
# The block of particles can occupy at most two blocks on the sparse grid
assert 1 <= grid.num_dynamically_allocated <= 2

ti.sync()
# The block of particles can occupy at most two blocks on the sparse grid.
# It's fine to run 100 times and do just one final check, because
# num_dynamically_allocated stores the number of slots *ever* allocated.
assert 1 <= grid.num_dynamically_allocated <= 2, grid.num_dynamically_allocated


@ti.test(require=ti.extension.sparse)
def test_block():
_test_block_gc()


@ti.test(require=[ti.extension.sparse, ti.extension.async_mode],
async_mode=True)
def test_block_async():
_test_block_gc()


@ti.test(require=ti.extension.sparse)
Expand Down Expand Up @@ -79,3 +94,38 @@ def test_pointer_gc():

# Note that being inactive doesn't mean it's not allocated.
assert L.num_dynamically_allocated == 1


@ti.test(require=[ti.extension.sparse, ti.extension.async_mode],
async_mode=True)
def test_fuse_allocator_state():
N = 16
x = ti.field(dtype=ti.i32, shape=N)
y = ti.field(dtype=ti.i32)

y_parent = ti.root.pointer(ti.i, N * 2)
y_parent.place(y)

# https://github.com/taichi-dev/taichi/pull/1973#pullrequestreview-511154376

@ti.kernel
def activate_y():
for i in x:
idx = i + 1
y[idx] = idx

@ti.kernel
def deactivate_y():
for i in x:
ti.deactivate(y_parent, i)

activate_y()
deactivate_y()
ti.sync()

# TODO: assert that activate_y and deactivate_y are not fused.
assert y_parent.num_dynamically_allocated == N
ys = y.to_numpy()
for i, y in enumerate(ys):
expected = N if i == N else 0
assert y == expected

0 comments on commit 4a56852

Please sign in to comment.