Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[refactor] Remove obsolete AsyncEngine::fuse() #1944

Merged
merged 1 commit into from
Oct 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 0 additions & 100 deletions taichi/program/async_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,106 +256,6 @@ void AsyncEngine::synchronize() {
cur_sync_sfg_debug_per_stage_counts_.clear();
}

bool AsyncEngine::fuse() {
// TODO: migrated to SFG...
bool modified = false;
std::unordered_map<SNode *, bool> list_dirty;

if (false) {
// (experimental) print tasks
for (int i = 0; i < (int)task_queue.size(); i++) {
fmt::print("{}: {}\n", i, task_queue[i].stmt()->task_name());
irpass::print(task_queue[i].stmt());
}
}

for (int i = 0; i < (int)task_queue.size() - 1; i++) {
auto &rec_a = task_queue[i];
auto &rec_b = task_queue[i + 1];
auto *task_a = rec_a.stmt();
auto *task_b = rec_b.stmt();
bool is_same_struct_for = task_a->task_type == OffloadedStmt::struct_for &&
task_b->task_type == OffloadedStmt::struct_for &&
task_a->snode == task_b->snode &&
task_a->block_dim == task_b->block_dim;
// TODO: a few problems with the range-for test condition:
// 1. This could incorrectly fuse two range-for kernels that have different
// sizes, but then the loop ranges get padded to the same power-of-two (E.g.
// maybe a side effect when a struct-for is demoted to range-for).
// 2. It has also fused range-fors that have the same linear range, but are
// of different dimensions of loop indices, e.g. (16, ) and (4, 4).
bool is_same_range_for = task_a->task_type == OffloadedStmt::range_for &&
task_b->task_type == OffloadedStmt::range_for &&
task_a->const_begin && task_b->const_begin &&
task_a->const_end && task_b->const_end &&
task_a->begin_value == task_b->begin_value &&
task_a->end_value == task_b->end_value;

// We do not fuse serial kernels for now since they can be SNode accessors
bool are_both_serial = task_a->task_type == OffloadedStmt::serial &&
task_b->task_type == OffloadedStmt::serial;
const bool same_kernel = (rec_a.kernel == rec_b.kernel);
bool kernel_args_match = true;
if (!same_kernel) {
// Merging kernels with different signatures will break invariants. E.g.
// https://github.com/taichi-dev/taichi/blob/a6575fb97557267e2f550591f43b183076b72ac2/taichi/transforms/type_check.cpp#L326
//
// TODO: we could merge different kernels if their args are the same. But
// we have no way to check that for now.
auto check = [](const Kernel *k) {
return (k->args.empty() && k->rets.empty());
};
kernel_args_match = (check(rec_a.kernel) && check(rec_b.kernel));
}
if (kernel_args_match && (is_same_range_for || is_same_struct_for)) {
// We are about to change both |task_a| and |task_b|. Clone them first.
auto cloned_task_a = rec_a.ir_handle.clone();
auto cloned_task_b = rec_b.ir_handle.clone();
task_a = cloned_task_a->as<OffloadedStmt>();
task_b = cloned_task_b->as<OffloadedStmt>();
// TODO: in certain cases this optimization can be wrong!
// Fuse task b into task_a
for (int j = 0; j < (int)task_b->body->size(); j++) {
task_a->body->insert(std::move(task_b->body->statements[j]));
}
task_b->body->statements.clear();

// replace all reference to the offloaded statement B to A
irpass::replace_all_usages_with(task_a, task_b, task_a);

auto kernel = task_queue[i].kernel;
irpass::full_simplify(task_a, /*after_lower_access=*/false, kernel);
// For now, re_id is necessary for the hash to be correct.
irpass::re_id(task_a);

auto h = ir_bank_.get_hash(task_a);
task_queue[i].ir_handle = IRHandle(task_a, h);
ir_bank_.insert(std::move(cloned_task_a), h);
task_queue[i + 1].ir_handle = IRHandle(nullptr, 0);

// TODO: since cloned_task_b->body is empty, can we remove this (i.e.,
// simply delete cloned_task_b here)?
ir_bank_.insert_to_trash_bin(std::move(cloned_task_b));

modified = true;
i++; // skip fusing task_queue[i + 1] and task_queue[i + 2]
}
}

auto new_task_queue = std::deque<TaskLaunchRecord>();

// Eliminate empty tasks
for (int i = 0; i < (int)task_queue.size(); i++) {
if (task_queue[i].ir_handle.ir() != nullptr) {
new_task_queue.push_back(task_queue[i]);
}
}

task_queue = std::move(new_task_queue);

return modified;
}

void AsyncEngine::debug_sfg(const std::string &stage) {
auto prefix = program->config.async_opt_intermediate_file;
if (prefix.empty())
Expand Down
2 changes: 0 additions & 2 deletions taichi/program/async_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ class AsyncEngine {
explicit AsyncEngine(Program *program,
const BackendExecCompilationFunc &compile_to_backend);

bool fuse(); // return true when modified

void clear_cache() {
queue.clear_cache();
}
Expand Down