From 068af03eba4d4846508e604a521846da66096426 Mon Sep 17 00:00:00 2001 From: xumingkuan Date: Wed, 25 Mar 2020 19:06:51 -0400 Subject: [PATCH] [misc] Introduced a temporary boolean constant for benchmarking advanced optimizations (#657) * Introduce a temporary boolean constant for benchmarking * [skip ci] enforce code format Co-authored-by: Taichi Gardener --- README.md | 4 ++-- examples/mgpcg_advanced.py | 13 ++++++------- taichi/program/compile_config.cpp | 2 ++ taichi/program/compile_config.h | 4 ++++ taichi/transforms/simplify.cpp | 29 +++++++++++++++++++++++++---- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f08a314019b56..14cf760c3368b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@

Docs | Tutorial | DiffTaichi | Examples | Contribute | Forum

-
+ | **Documentations** | **Chat** | taichi-nightly | taichi-nightly-cuda-10-0 | taichi-nightly-cuda-10-1 | |:-----|:-----|:----|:----|:----| @@ -46,7 +46,7 @@ python3 -m pip install taichi-nightly-cuda-10-1 - Experimental support for automatically differentiating through conditional global load/stores (by **Yuanming Hu**) - **Bug fixes** - Fixed stack traceback printing on OS X (#610) (by **Yuanming Hu**) - - **CLI** + - **CLI** - `ti format` now cover all files from upstream/master to the working tree (#629) (by **Ye Kuang**) - `ti test` now uses `argparse` for better customizability (#601) (by **彭于斌**) - **OpenGL backend** diff --git a/examples/mgpcg_advanced.py b/examples/mgpcg_advanced.py index 3b951e93775a3..2d54a4de9416b 100644 --- a/examples/mgpcg_advanced.py +++ b/examples/mgpcg_advanced.py @@ -36,15 +36,14 @@ def __init__(self): shape=(self.N_gui, self.N_gui)) # image buffer indices = ti.ijk if self.dim == 3 else ti.ij - self.grid = ti.root.pointer(indices, - [self.N_tot // 4]).dense(indices, 4).place( - self.x, self.p, self.Ap) + self.grid = ti.root.pointer(indices, [self.N_tot // 4]).dense( + indices, 4).place(self.x, self.p, self.Ap) for l in range(self.n_mg_levels): self.grid = ti.root.pointer(indices, [self.N_tot // (4 * 2**l)]).dense( - indices, 4).place( - self.r[l], self.z[l]) + indices, + 4).place(self.r[l], self.z[l]) ti.root.place(self.alpha, self.beta, self.sum) @@ -55,8 +54,8 @@ def init(self): (self.N_ext, self.N_tot - self.N_ext), ) * self.dim)): self.r[0][I] = 1.0 for k in ti.static(range(self.dim)): - self.r[0][I] *= ti.sin(2.0 * np.pi * - (I[k] - self.N_ext) * 2.0 / self.N_tot) + self.r[0][I] *= ti.sin(2.0 * np.pi * (I[k] - self.N_ext) * + 2.0 / self.N_tot) self.z[0][I] = 0.0 self.Ap[I] = 0.0 self.p[I] = 0.0 diff --git a/taichi/program/compile_config.cpp b/taichi/program/compile_config.cpp index 70adf5dc543de..bc5ccfb0db2a9 100644 --- a/taichi/program/compile_config.cpp +++ b/taichi/program/compile_config.cpp @@ -2,6 +2,8 @@ TLANG_NAMESPACE_BEGIN +const bool advanced_optimization = true; + CompileConfig::CompileConfig() { arch = host_arch(); simd_width = default_simd_width(arch); diff --git a/taichi/program/compile_config.h b/taichi/program/compile_config.h index bc0fa9ac7b188..ec6b4a4f67c5f 100644 --- a/taichi/program/compile_config.h +++ b/taichi/program/compile_config.h @@ -3,6 +3,10 @@ TLANG_NAMESPACE_BEGIN +// TODO(xumingkuan): Temporary variable for benchmarking. +// TODO(xumingkuan): Will be removed in the future. +extern const bool advanced_optimization; + struct CompileConfig { Arch arch; bool debug; diff --git a/taichi/transforms/simplify.cpp b/taichi/transforms/simplify.cpp index be74f80a22e7a..fa6113726e078 100644 --- a/taichi/transforms/simplify.cpp +++ b/taichi/transforms/simplify.cpp @@ -667,8 +667,10 @@ class BasicBlockSimplify : public IRVisitor { } void visit(LinearizeStmt *stmt) override { - // if (is_done(stmt)) - // return; + if (!advanced_optimization) { + if (is_done(stmt)) + return; + } if (stmt->inputs.size() && stmt->inputs.back()->is()) { auto previous_offset = stmt->inputs.back()->as(); @@ -681,7 +683,25 @@ class BasicBlockSimplify : public IRVisitor { offset_stmt->as()->input = stmt; throw IRModified(); } - // set_done(stmt); + if (!advanced_optimization) { + for (int i = 0; i < current_stmt_id; i++) { + auto &bstmt = block->statements[i]; + if (stmt->ret_type == bstmt->ret_type) { + auto &bstmt_data = *bstmt; + if (typeid(bstmt_data) == typeid(*stmt)) { + auto bstmt_ = bstmt->as(); + if (identical_vectors(bstmt_->inputs, stmt->inputs) && + identical_vectors(bstmt_->strides, stmt->strides)) { + stmt->replace_with(bstmt.get()); + stmt->parent->erase(current_stmt_id); + throw IRModified(); + } + } + } + } + set_done(stmt); + return; + } // Lower into a series of adds and muls. auto sum = Stmt::make(LaneAttribute(0)); @@ -1014,7 +1034,8 @@ void simplify(IRNode *root) { void full_simplify(IRNode *root, const CompileConfig &config) { constant_fold(root); - alg_simp(root, config); + if (advanced_optimization) + alg_simp(root, config); die(root); simplify(root); }