diff --git a/taichi/program/async_engine.cpp b/taichi/program/async_engine.cpp index 571ee1c05ea89..2a58e42e89bd6 100644 --- a/taichi/program/async_engine.cpp +++ b/taichi/program/async_engine.cpp @@ -256,106 +256,6 @@ void AsyncEngine::synchronize() { cur_sync_sfg_debug_per_stage_counts_.clear(); } -bool AsyncEngine::fuse() { - // TODO: migrated to SFG... - bool modified = false; - std::unordered_map list_dirty; - - if (false) { - // (experimental) print tasks - for (int i = 0; i < (int)task_queue.size(); i++) { - fmt::print("{}: {}\n", i, task_queue[i].stmt()->task_name()); - irpass::print(task_queue[i].stmt()); - } - } - - for (int i = 0; i < (int)task_queue.size() - 1; i++) { - auto &rec_a = task_queue[i]; - auto &rec_b = task_queue[i + 1]; - auto *task_a = rec_a.stmt(); - auto *task_b = rec_b.stmt(); - bool is_same_struct_for = task_a->task_type == OffloadedStmt::struct_for && - task_b->task_type == OffloadedStmt::struct_for && - task_a->snode == task_b->snode && - task_a->block_dim == task_b->block_dim; - // TODO: a few problems with the range-for test condition: - // 1. This could incorrectly fuse two range-for kernels that have different - // sizes, but then the loop ranges get padded to the same power-of-two (E.g. - // maybe a side effect when a struct-for is demoted to range-for). - // 2. It has also fused range-fors that have the same linear range, but are - // of different dimensions of loop indices, e.g. (16, ) and (4, 4). - bool is_same_range_for = task_a->task_type == OffloadedStmt::range_for && - task_b->task_type == OffloadedStmt::range_for && - task_a->const_begin && task_b->const_begin && - task_a->const_end && task_b->const_end && - task_a->begin_value == task_b->begin_value && - task_a->end_value == task_b->end_value; - - // We do not fuse serial kernels for now since they can be SNode accessors - bool are_both_serial = task_a->task_type == OffloadedStmt::serial && - task_b->task_type == OffloadedStmt::serial; - const bool same_kernel = (rec_a.kernel == rec_b.kernel); - bool kernel_args_match = true; - if (!same_kernel) { - // Merging kernels with different signatures will break invariants. E.g. - // https://github.com/taichi-dev/taichi/blob/a6575fb97557267e2f550591f43b183076b72ac2/taichi/transforms/type_check.cpp#L326 - // - // TODO: we could merge different kernels if their args are the same. But - // we have no way to check that for now. - auto check = [](const Kernel *k) { - return (k->args.empty() && k->rets.empty()); - }; - kernel_args_match = (check(rec_a.kernel) && check(rec_b.kernel)); - } - if (kernel_args_match && (is_same_range_for || is_same_struct_for)) { - // We are about to change both |task_a| and |task_b|. Clone them first. - auto cloned_task_a = rec_a.ir_handle.clone(); - auto cloned_task_b = rec_b.ir_handle.clone(); - task_a = cloned_task_a->as(); - task_b = cloned_task_b->as(); - // TODO: in certain cases this optimization can be wrong! - // Fuse task b into task_a - for (int j = 0; j < (int)task_b->body->size(); j++) { - task_a->body->insert(std::move(task_b->body->statements[j])); - } - task_b->body->statements.clear(); - - // replace all reference to the offloaded statement B to A - irpass::replace_all_usages_with(task_a, task_b, task_a); - - auto kernel = task_queue[i].kernel; - irpass::full_simplify(task_a, /*after_lower_access=*/false, kernel); - // For now, re_id is necessary for the hash to be correct. - irpass::re_id(task_a); - - auto h = ir_bank_.get_hash(task_a); - task_queue[i].ir_handle = IRHandle(task_a, h); - ir_bank_.insert(std::move(cloned_task_a), h); - task_queue[i + 1].ir_handle = IRHandle(nullptr, 0); - - // TODO: since cloned_task_b->body is empty, can we remove this (i.e., - // simply delete cloned_task_b here)? - ir_bank_.insert_to_trash_bin(std::move(cloned_task_b)); - - modified = true; - i++; // skip fusing task_queue[i + 1] and task_queue[i + 2] - } - } - - auto new_task_queue = std::deque(); - - // Eliminate empty tasks - for (int i = 0; i < (int)task_queue.size(); i++) { - if (task_queue[i].ir_handle.ir() != nullptr) { - new_task_queue.push_back(task_queue[i]); - } - } - - task_queue = std::move(new_task_queue); - - return modified; -} - void AsyncEngine::debug_sfg(const std::string &stage) { auto prefix = program->config.async_opt_intermediate_file; if (prefix.empty()) diff --git a/taichi/program/async_engine.h b/taichi/program/async_engine.h index 63f336e575dca..d2e205b62fc6d 100644 --- a/taichi/program/async_engine.h +++ b/taichi/program/async_engine.h @@ -140,8 +140,6 @@ class AsyncEngine { explicit AsyncEngine(Program *program, const BackendExecCompilationFunc &compile_to_backend); - bool fuse(); // return true when modified - void clear_cache() { queue.clear_cache(); }