From 4eea1ec19429bf457bf117a752ae4479cabfd7ac Mon Sep 17 00:00:00 2001 From: zhengxianli Date: Thu, 13 Apr 2023 11:32:57 +0800 Subject: [PATCH] [perf] Fix Taichi CPU backend compile parameter to pair performance with Numba. (#7731) Issue: #7442 ### Brief Summary In this issue, Numba is a magnitude faster than Taichi due to the absence of automatic vectorization. The root cause is the incorrect passage of the `fast_flag`. To solve this problem, `fast_flag` is now added to the initialization of cpu codegen. Numba and Taichi now reveal comparable performance. Here's perf comparison: numba: 13052.542478MFlops taichi(master): 6544.274409MFlops taichi(this pr): 12778.240179MFlops --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- taichi/codegen/llvm/codegen_llvm.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/taichi/codegen/llvm/codegen_llvm.cpp b/taichi/codegen/llvm/codegen_llvm.cpp index a3133e4c4b2c2..468a501df927b 100644 --- a/taichi/codegen/llvm/codegen_llvm.cpp +++ b/taichi/codegen/llvm/codegen_llvm.cpp @@ -2542,6 +2542,13 @@ void TaskCodeGenLLVM::initialize_context() { TI_ASSERT(tlctx != nullptr); llvm_context = tlctx->get_this_thread_context(); builder = std::make_unique>(*llvm_context); + if (compile_config.fast_math) { + llvm::FastMathFlags fast_flags; + fast_flags.setNoInfs(); + fast_flags.setNoSignedZeros(); + fast_flags.setAllowReassoc(); + builder->setFastMathFlags(fast_flags); + } } llvm::Value *TaskCodeGenLLVM::get_arg(int i) {