diff --git a/CMakeLists.txt b/CMakeLists.txt
index 93aaab654c02e..bc18b82fd9070 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,7 +8,7 @@ project(taichi)
 
 SET(TI_VERSION_MAJOR 0)
 SET(TI_VERSION_MINOR 5)
-SET(TI_VERSION_PATCH 0)
+SET(TI_VERSION_PATCH 1)
 
 execute_process(
   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} 
diff --git a/README.md b/README.md
index 1b495cb0d3e58..192e15ccbe83b 100644
--- a/README.md
+++ b/README.md
@@ -20,23 +20,13 @@ python3 -m pip install taichi-nightly-cuda-10-0
 python3 -m pip install taichi-nightly-cuda-10-1
 ```
 
+## [Contribution Guidelines](https://taichi.readthedocs.io/en/latest/contributor_guide.html)
+
 || **Linux (CUDA)** | **OS X (10.14+)** | **Windows** |
 |:------|:-----|:-----|:-----|
 |**Build**|[![Build Status](http://f11.csail.mit.edu:8080/job/taichi/badge/icon)](http://f11.csail.mit.edu:8080/job/taichi/)| [![Build Status](https://travis-ci.com/taichi-dev/taichi.svg?branch=master)](https://travis-ci.com/taichi-dev/taichi) | [![Build status](https://ci.appveyor.com/api/projects/status/yxm0uniin8xty4j7/branch/master?svg=true)](https://ci.appveyor.com/project/yuanming-hu/taichi/branch/master)|
 |**PyPI**|[![Build Status](https://travis-ci.com/yuanming-hu/taichi-wheels-test.svg?branch=master)](https://travis-ci.com/yuanming-hu/taichi-wheels-test)|[![Build Status](https://travis-ci.com/yuanming-hu/taichi-wheels-test.svg?branch=master)](https://travis-ci.com/yuanming-hu/taichi-wheels-test)|[![Build status](https://ci.appveyor.com/api/projects/status/39ar9wa8yd49je7o?svg=true)](https://ci.appveyor.com/project/IteratorAdvance/taichi-wheels-test)|
 
-## [Contribution Guidelines](https://taichi.readthedocs.io/en/latest/contributor_guide.html)
-
-## Related papers
-- [**(SIGGRAPH Asia 2019) High-Performance Computation on Sparse Data Structures**](http://taichi.graphics/wp-content/uploads/2019/09/taichi_lang.pdf) [[Video]](https://youtu.be/wKw8LMF3Djo) [[BibTex]](https://raw.githubusercontent.com/yuanming-hu/taichi/master/misc/taichi_bibtex.txt)
-  - by *Yuanming Hu, Tzu-Mao Li, Luke Anderson, Jonathan Ragan-Kelley, and Frédo Durand*
-- [**(ICLR 2020) Differentiable Programming for Physical Simulation**](https://arxiv.org/abs/1910.00935) [[Video]](https://www.youtube.com/watch?v=Z1xvAZve9aE) [[BibTex]](https://raw.githubusercontent.com/yuanming-hu/taichi/master/misc/difftaichi_bibtex.txt) [[Code]](https://github.com/yuanming-hu/difftaichi)
-  - by *Yuanming Hu, Luke Anderson, Tzu-Mao Li, Qi Sun, Nathan Carr, Jonathan Ragan-Kelley, and Frédo Durand*
-
-<div align="center">
-  <img width="800px" src="https://github.com/taichi-dev/taichi/blob/master/docs/life_of_kernel_lowres.jpg">
-</div>        
-
 ## Short-term goals
 - (Done) Fully implement the LLVM backend to replace the legacy source-to-source C++/CUDA backends (By Dec 2019)
   - The only missing features compared to the old source-to-source backends:
@@ -46,6 +36,14 @@ python3 -m pip install taichi-nightly-cuda-10-1
 - (WIP) Tune the performance of the LLVM backend to match that of the legacy source-to-source backends (Hopefully by mid Feb, 2020. Current progress: setting up/tuning for final benchmarks)
 
 ## Updates
+- (Feb  16, 2020) v0.5.1 released
+   - Keyboard and mouse events supported in the GUI system. Check out [mpm128.py](https://github.com/taichi-dev/taichi/blob/master/examples/mpm128.py) for a interactive demo! (by **Yubin Peng [archibate] and Ye Kuang [k-ye]**)
+   - Basic algebraic simplification passes (by **Mingkuan Xu [xumingkuan]**)
+   - (For developers) `ti` (`ti.exe`) command supported on Windows after setting `%PATH%` correctly (by **Mingkuan Xu [xumingkuan]**)
+   - General power operator `x ** y` now supported in Taichi kernels (by **Yubin Peng [archibate]**)
+   - `.dense(...).pointer()` now abbreviated as `.pointer(...)`. `pointer` now stands for a dense pointer array. This leads to cleaner code and better performance. (by **Kenneth Lozes [KLozes]**)
+   - (Advanced struct-fors only) `for i in X` now iterates all child instances of `X` instead of `X` itself. Skip this if you only use `X=leaf node` such as `ti.f32/i32/Vector/Matrix`.
+   - Fixed cuda random number generator racing conditions
 - (Feb  14, 2020) **v0.5.0 released with a new Apple Metal GPU backend for Mac OS X users!** (by **Ye Kuang [k-ye]**)
    - Just initialize your program with `ti.init(..., arch=ti.metal)` and run Taichi on your Mac GPUs!
    - A few takeaways if you do want to use the Metal backend:
@@ -103,3 +101,14 @@ python3 -m pip install taichi-nightly-cuda-10-1
    - Doc updated
 
 - [Full changelog](changelog.md)
+
+
+## Related papers
+- [**(SIGGRAPH Asia 2019) High-Performance Computation on Sparse Data Structures**](http://taichi.graphics/wp-content/uploads/2019/09/taichi_lang.pdf) [[Video]](https://youtu.be/wKw8LMF3Djo) [[BibTex]](https://raw.githubusercontent.com/yuanming-hu/taichi/master/misc/taichi_bibtex.txt)
+  - by *Yuanming Hu, Tzu-Mao Li, Luke Anderson, Jonathan Ragan-Kelley, and Frédo Durand*
+- [**(ICLR 2020) Differentiable Programming for Physical Simulation**](https://arxiv.org/abs/1910.00935) [[Video]](https://www.youtube.com/watch?v=Z1xvAZve9aE) [[BibTex]](https://raw.githubusercontent.com/yuanming-hu/taichi/master/misc/difftaichi_bibtex.txt) [[Code]](https://github.com/yuanming-hu/difftaichi)
+  - by *Yuanming Hu, Luke Anderson, Tzu-Mao Li, Qi Sun, Nathan Carr, Jonathan Ragan-Kelley, and Frédo Durand*
+
+<div align="center">
+  <img width="800px" src="https://github.com/taichi-dev/taichi/blob/master/docs/life_of_kernel_lowres.jpg">
+</div>
diff --git a/appveyor.yml b/appveyor.yml
index 7fdcd2d22d168..2813e3619df61 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -33,6 +33,8 @@ skip_commits:
     - '.*'
 
 build_script:
+  - set TAICHI_REPO_DIR=C:\taichi
+  # - 'if %PYTHON% %TAICHI_REPO_DIR%\misc\appveyor_filter.py; then exit 0; fi'
   - cd C:\
   - curl --retry 10 --retry-delay 5 https://github.com/yuanming-hu/taichi_assets/releases/download/llvm8/taichi-llvm-8.0.1-msvc2017.zip -LO
   - 7z x taichi-llvm-8.0.1-msvc2017.zip -otaichi_llvm
@@ -40,7 +42,6 @@ build_script:
   - 7z x clang-7.0.1-win.zip -otaichi_clang
   - set PATH=C:\taichi_llvm\bin;%PATH%;
   - set PATH=C:\taichi_clang\bin;%PATH%
-  - set TAICHI_REPO_DIR=C:\taichi
   - set PYTHONPATH=%TAICHI_REPO_DIR%/python
   - set PATH=%TAICHI_REPO_DIR%\bin;%PATH%
   - clang --version
diff --git a/benchmarks/fill_sparse.py b/benchmarks/fill_sparse.py
index 0e4d72533f362..0ba80f6f8d31a 100644
--- a/benchmarks/fill_sparse.py
+++ b/benchmarks/fill_sparse.py
@@ -6,7 +6,7 @@ def benchmark_nested_struct():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.ij, [N, N]).pointer().dense(ti.ij, [8, 8]).place(a)
+    ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
 
   @ti.kernel
   def fill():
@@ -24,7 +24,7 @@ def benchmark_nested_struct_fill_and_clear():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.ij, [N, N]).pointer().dense(ti.ij, [8, 8]).place(a)
+    ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
 
   @ti.kernel
   def fill():
diff --git a/ci_setup.py b/ci_setup.py
index 5b91d49d6c34b..dbe0637cc8748 100644
--- a/ci_setup.py
+++ b/ci_setup.py
@@ -47,7 +47,7 @@ def get_shell_rc_name():
 
 def get_username():
   if build_type == 'ci':
-    os.environ['TC_CI'] = '1'
+    os.environ['TI_CI'] = '1'
     username = 'travis'
   else:
     assert get_os_name() != 'win'
diff --git a/cmake/TaichiCXXFlags.cmake b/cmake/TaichiCXXFlags.cmake
index 588ca9ba6db76..b03a90593404c 100644
--- a/cmake/TaichiCXXFlags.cmake
+++ b/cmake/TaichiCXXFlags.cmake
@@ -1,6 +1,6 @@
 message("Using C++ compiler: " ${CMAKE_CXX_COMPILER})
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTC_ISE_NONE")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_ISE_NONE")
 
 option(BUILD_WITH_ADDRESS_SANITIZER "Build with clang address sanitizer" OFF)
 
@@ -49,19 +49,19 @@ if (USE_STDCPP)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTC_PASS_EXCEPTION_TO_PYTHON")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_PASS_EXCEPTION_TO_PYTHON")
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTC_INCLUDED")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_INCLUDED")
 
-if ($ENV{TC_USE_DOUBLE})
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTC_USE_DOUBLE")
+if ($ENV{TI_USE_DOUBLE})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_USE_DOUBLE")
     message("Using float64 (double) precision as real")
 else()
     message("Using float32 (single) precision as real")
 endif()
 
-if (TC_USE_MPI)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTC_USE_MPI")
+if (TI_USE_MPI)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_USE_MPI")
     message("Using MPI")
 endif ()
 
diff --git a/cmake/TaichiCore.cmake b/cmake/TaichiCore.cmake
index b831df4405943..803b7c79fc601 100644
--- a/cmake/TaichiCore.cmake
+++ b/cmake/TaichiCore.cmake
@@ -40,7 +40,7 @@ if (TI_WITH_CUDA)
         message("Building with CUDA ${CUDA_VERSION}")
         set(CUDA_ARCH 61)
         message("Found CUDA. Arch = ${CUDA_ARCH}")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_FOUND -DTI_WITH_CUDA")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTI_WITH_CUDA")
         if (MSVC)
             include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
             target_link_libraries(${LIBRARY_NAME} ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64/cudart.lib ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64/cuda.lib)
@@ -83,7 +83,7 @@ llvm_map_components_to_libnames(llvm_libs
         )
 target_link_libraries(${LIBRARY_NAME} ${llvm_libs})
 
-if (CUDA_FOUND)
+if (TI_WITH_CUDA)
     llvm_map_components_to_libnames(llvm_ptx_libs NVPTX)
     target_link_libraries(${LIBRARY_NAME} ${llvm_ptx_libs})
 endif()
diff --git a/docs/cpp_style.rst b/docs/cpp_style.rst
index b9c8295bac726..6104b5b861124 100644
--- a/docs/cpp_style.rst
+++ b/docs/cpp_style.rst
@@ -5,7 +5,7 @@ Naming
 --------------------------------------------------------------------------
 - Variable names should consist of lowercase words connected by underscores, e.g. ``llvm_context``.
 - Class and struct names should consist of words with first letters capitalized, e.g. ``CodegenLLVM``.
-- Macros should be capital start with ``TC``, such as ``TC_INFO``, ``TC_IMPLEMENTATION``.
+- Macros should be capital start with ``TC``, such as ``TI_INFO``, ``TI_IMPLEMENTATION``.
 
    - We do not encourage the use of macro, although there are cases where macros are inevitable.
 
diff --git a/docs/utilities.rst b/docs/utilities.rst
index 55642ac85f719..3e14f5fe53e56 100644
--- a/docs/utilities.rst
+++ b/docs/utilities.rst
@@ -47,45 +47,45 @@ Serialization
 
 The serialization module of taichi allows you to serialize/deserialize objects into/from binary strings.
 
-You can use ``TC_IO`` macros to explicit define fields necessary in Taichi.
+You can use ``TI_IO`` macros to explicit define fields necessary in Taichi.
 
 .. code-block:: cpp
 
-    // TC_IO_DEF
+    // TI_IO_DEF
     struct Particle {
         Vector3f position, velocity;
         real mass;
         string name;
 
-        TC_IO_DEF(position, velocity, mass, name);
+        TI_IO_DEF(position, velocity, mass, name);
     }
 
-    // TC_IO_DECL
+    // TI_IO_DECL
     struct Particle {
         Vector3f position, velocity;
         real mass;
         bool has_name
         string name;
 
-        TC_IO_DECL() {
-            TC_IO(position);
-            TC_IO(velocity);
-            TC_IO(mass);
-            TC_IO(has_name);
+        TI_IO_DECL() {
+            TI_IO(position);
+            TI_IO(velocity);
+            TI_IO(mass);
+            TI_IO(has_name);
             // More flexibility:
             if (has_name) {
-                TC_IO(name);
+                TI_IO(name);
             }
         }
     }
 
-    // TC_IO_DEF_VIRT();
+    // TI_IO_DEF_VIRT();
 
 
 Progress Notification
 ----------------------------------
 
-The taichi messager can send an email to ``$TC_MONITOR_EMAIL`` when the task finished or crashed.
+The taichi messager can send an email to ``$TI_MONITOR_EMAIL`` when the task finished or crashed.
 To enable:
 
 .. code-block:: python
diff --git a/docs/version b/docs/version
index 8f0916f768f04..4b9fcbec101a6 100644
--- a/docs/version
+++ b/docs/version
@@ -1 +1 @@
-0.5.0
+0.5.1
diff --git a/examples/cpp/cnn.cpp b/examples/cpp/cnn.cpp
index 457fdbe0b7adb..04e2faeb12d5b 100644
--- a/examples/cpp/cnn.cpp
+++ b/examples/cpp/cnn.cpp
@@ -3,7 +3,7 @@
 #include <taichi/visual/gui.h>
 #include <taichi/system/profiler.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using namespace Tlang;
 
@@ -12,10 +12,10 @@ constexpr int n = 256;
 constexpr int num_ch1 = 16, num_ch2 = 16;
 
 auto cnn = [](std::vector<std::string> cli_param) {
-  TC_WARN(
+  TI_WARN(
       "After refactoring the Texture class from Taichi is removed. Need to "
       "read from the raw bunny binary.");
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 #if (0)
   CoreState::set_trigger_gdb_when_crash(true);
   auto param = parse_param(cli_param);
@@ -24,7 +24,7 @@ auto cnn = [](std::vector<std::string> cli_param) {
   auto cache_l1 = param.get("cache_l1", true);
   auto use_dense = param.get("use_dense", false);
   auto write_input_voxel = param.get("write_input", true);
-  TC_P(use_dense);
+  TI_P(use_dense);
 
   Program prog(gpu ? Arch::gpu : Arch::x86_64);
   prog.config.simplify_before_lower_access = opt;
@@ -228,6 +228,6 @@ auto cnn = [](std::vector<std::string> cli_param) {
 #endif
 #endif
 };
-TC_REGISTER_TASK(cnn);
+TI_REGISTER_TASK(cnn);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/examples/cpp/diff_conv.cpp b/examples/cpp/diff_conv.cpp
index 46466ef5756e5..e35cee5ef5a8b 100644
--- a/examples/cpp/diff_conv.cpp
+++ b/examples/cpp/diff_conv.cpp
@@ -1,23 +1,23 @@
 #include <taichi/util.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 auto diff_conv = [](const std::vector<std::string> args) {
   int grid_resolution = 254;
-  TC_ASSERT(args.size() == 3);
+  TI_ASSERT(args.size() == 3);
   float th = std::stof(args[2]);
-  TC_P(th);
+  TI_P(th);
   auto f = fopen(args[0].c_str(), "rb");
 
   int n = pow<3>(grid_resolution);
-  TC_ASSERT(f);
+  TI_ASSERT(f);
 
   std::vector<float32> ret1(n);
   trash(std::fread(ret1.data(), sizeof(float32), ret1.size(), f));
   std::fclose(f);
 
   f = fopen(args[1].c_str(), "rb");
-  TC_ASSERT(f);
+  TI_ASSERT(f);
   std::vector<float32> ret2(n);
   trash(std::fread(ret2.data(), sizeof(float32), ret2.size(), f));
   std::fclose(f);
@@ -53,19 +53,19 @@ auto diff_conv = [](const std::vector<std::string> args) {
     // fprintf(stderr, "ret1:%f, ret2:%f\n", ret1[i], ret2[i]);
     //}
   }
-  TC_INFO("same {} {}%", counter[0], 100.0f * counter[0] / n);
-  TC_INFO("non zero same {} {}%", counter[0],
+  TI_INFO("same {} {}%", counter[0], 100.0f * counter[0] / n);
+  TI_INFO("non zero same {} {}%", counter[0],
           100.0f * counter[1] / total_non_zero);
-  TC_P(sum1 / n);
-  TC_P(sum2 / n);
-  TC_P(sum1 / total_non_zero);
-  TC_P(sum2 / total_non_zero);
-  TC_P(max1);
-  TC_P(max2);
-  TC_P(non_zero1);
-  TC_P(non_zero2);
+  TI_P(sum1 / n);
+  TI_P(sum2 / n);
+  TI_P(sum1 / total_non_zero);
+  TI_P(sum2 / total_non_zero);
+  TI_P(max1);
+  TI_P(max2);
+  TI_P(non_zero1);
+  TI_P(non_zero2);
 };
 
-TC_REGISTER_TASK(diff_conv);
+TI_REGISTER_TASK(diff_conv);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/examples/cpp/fem.cpp b/examples/cpp/fem.cpp
index 45ddc351d8e29..df1a89b5b08c6 100644
--- a/examples/cpp/fem.cpp
+++ b/examples/cpp/fem.cpp
@@ -5,7 +5,7 @@
 #include <taichi/visual/texture.h>
 #include <deque>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 #include "fem_coeff.h"
 
@@ -30,30 +30,30 @@ auto fem = [](std::vector<std::string> cli_param) {
   auto param = parse_param(cli_param);
 
   bool gpu = param.get("gpu", false);
-  TC_P(gpu);
+  TI_P(gpu);
   bool vectorization = param.get("vec", true);
-  TC_P(vectorization);
+  TI_P(vectorization);
   int threads = param.get("threads", 8);
-  TC_P(threads);
+  TI_P(threads);
   bool use_cache = param.get("cache", true);
-  TC_P(use_cache);
+  TI_P(use_cache);
   bool compute_gt = param.get("compute_gt", false);
-  TC_P(compute_gt);
+  TI_P(compute_gt);
   Program prog(gpu ? Arch::gpu : Arch::x86_64);
   prog.config.simplify_before_lower_access = param.get("simp1", true);
-  TC_P(prog.config.simplify_before_lower_access);
+  TI_P(prog.config.simplify_before_lower_access);
   prog.config.lower_access = param.get("lower_access", true);
-  TC_P(prog.config.lower_access);
+  TI_P(prog.config.lower_access);
   prog.config.print_ir = param.get("print_ir", false);
-  TC_P(prog.config.print_ir);
+  TI_P(prog.config.print_ir);
   prog.config.simplify_after_lower_access = param.get("simp2", true);
-  TC_P(prog.config.simplify_after_lower_access);
+  TI_P(prog.config.simplify_after_lower_access);
   prog.config.attempt_vectorized_load_cpu = param.get("vec_load_cpu", true);
-  TC_P(prog.config.attempt_vectorized_load_cpu);
+  TI_P(prog.config.attempt_vectorized_load_cpu);
   bool use_pointer = param.get("use_pointer", true);
-  TC_P(use_pointer);
+  TI_P(use_pointer);
   bool block_soa = param.get("block_soa", true);
-  TC_P(block_soa);
+  TI_P(block_soa);
   prog.config.lazy_compilation = false;
 
   Vector x(DataType::f32, dim), r(DataType::f32, dim), p(DataType::f32, dim),
@@ -261,7 +261,7 @@ auto fem = [](std::vector<std::string> cli_param) {
   for (int i = 0; i < n - 1; i++) {
     for (int j = 0; j < n - 1; j++) {
       for (int k = 0; k < n - 1; k++) {
-        TC_ASSERT(!active[i][j][k]);
+        TI_ASSERT(!active[i][j][k]);
       }
     }
   }
@@ -275,16 +275,16 @@ auto fem = [](std::vector<std::string> cli_param) {
   auto old_rTr = sum.val<float32>();
 
   for (int i = 0; i < 1000; i++) {
-    TC_P(i);
+    TI_P(i);
     compute_Ap();
     sum.val<float32>() = 0;
     reduce_pAp();
     auto pAp = sum.val<float32>();
     // alpha = rTr / pTAp
     alpha.val<float32>() = old_rTr / pAp;
-    TC_P(old_rTr);
-    // TC_P(pAp);
-    // TC_P(alpha.val<float32>());
+    TI_P(old_rTr);
+    // TI_P(pAp);
+    // TI_P(alpha.val<float32>());
     // x = x + alpha p
     update_x();
     // r = r - alpha Ap
@@ -293,12 +293,12 @@ auto fem = [](std::vector<std::string> cli_param) {
     sum.val<float32>() = 0;
     reduce_r();
     auto new_rTr = sum.val<float32>();
-    // TC_P(new_rTr);
+    // TI_P(new_rTr);
     if (new_rTr < 1e-5f)
       break;
     // beta = new rTr / old rTr
     beta.val<float32>() = new_rTr / old_rTr;
-    // TC_P(beta.val<float32>());
+    // TI_P(beta.val<float32>());
     // p = r + beta p
     update_p();
     old_rTr = new_rTr;
@@ -326,7 +326,7 @@ auto fem = [](std::vector<std::string> cli_param) {
       }
     }
   }
-  TC_P(residual);
+  TI_P(residual);
   auto difference = 0.0f;
   auto difference_max = 0.0f;
   for (int i = 0; i < n; i++) {
@@ -341,8 +341,8 @@ auto fem = [](std::vector<std::string> cli_param) {
       }
     }
   }
-  TC_P(difference);
-  TC_P(difference_max);
+  TI_P(difference);
+  TI_P(difference_max);
 
   int gui_res = 512;
   GUI gui("FEM", Vector2i(gui_res + 200, gui_res), false);
@@ -371,6 +371,6 @@ auto fem = [](std::vector<std::string> cli_param) {
     gui.update();
   }
 };
-TC_REGISTER_TASK(fem);
+TI_REGISTER_TASK(fem);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/examples/cpp/mgpcg.cpp b/examples/cpp/mgpcg.cpp
index 1276e9cd58d32..b1a5268d654eb 100644
--- a/examples/cpp/mgpcg.cpp
+++ b/examples/cpp/mgpcg.cpp
@@ -4,7 +4,7 @@
 #include <taichi/system/profiler.h>
 #include <taichi/visual/texture.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using namespace Tlang;
 
@@ -19,15 +19,15 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
   int block_size = 8;
 
   int threads = param.get("threads", 8);
-  TC_P(threads);
+  TI_P(threads);
   bool vec_option = param.get("vec", true);
   int vec = vec_option ? block_size : 1;
-  TC_ASSERT(vec == 1 || vec == block_size);
-  TC_P(vec);
+  TI_ASSERT(vec == 1 || vec == block_size);
+  TI_P(vec);
   bool load_gt = param.get("load_gt", false);
-  TC_P(load_gt)
+  TI_P(load_gt)
   bool gpu = param.get("gpu", false);
-  TC_P(gpu)
+  TI_P(gpu)
 
   CoreState::set_trigger_gdb_when_crash(true);
 
@@ -35,15 +35,15 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
   // prog.config.print_ir = true;
 
   prog.config.simplify_before_lower_access = param.get("simp1", true);
-  TC_P(prog.config.simplify_before_lower_access);
+  TI_P(prog.config.simplify_before_lower_access);
   prog.config.lower_access = param.get("lower_access", true);
-  TC_P(prog.config.lower_access);
+  TI_P(prog.config.lower_access);
   prog.config.print_ir = param.get("print_ir", false);
-  TC_P(prog.config.print_ir);
+  TI_P(prog.config.print_ir);
   prog.config.simplify_after_lower_access = param.get("simp2", true);
-  TC_P(prog.config.simplify_after_lower_access);
+  TI_P(prog.config.simplify_after_lower_access);
   prog.config.attempt_vectorized_load_cpu = param.get("vec_load_cpu", true);
-  TC_P(prog.config.attempt_vectorized_load_cpu);
+  TI_P(prog.config.attempt_vectorized_load_cpu);
 
   prog.config.lazy_compilation = false;
 
@@ -278,7 +278,7 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
   sum.val<float32>() = 0;
   reduce_r();
   auto initial_rTr = sum.val<float32>();
-  TC_P(initial_rTr);
+  TI_P(initial_rTr);
 
   // r = b - Ax = b    since x = 0
   // p = r = r + 0 p
@@ -291,16 +291,16 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
   // CG
   auto t = Time::get_time();
   for (int i = 0; i < 400; i++) {
-    TC_P(i);
+    TI_P(i);
     compute_Ap();
     sum.val<float32>() = 0;
     reduce_pAp();
     auto pAp = sum.val<float32>();
     // alpha = rTr / pTAp
     alpha.val<float32>() = old_zTr / pAp;
-    // TC_P(old_zTr);
-    // TC_P(pAp);
-    // TC_P(alpha.val<float32>());
+    // TI_P(old_zTr);
+    // TI_P(pAp);
+    // TI_P(alpha.val<float32>());
     // x = x + alpha p
     update_x();
     // r = r - alpha Ap
@@ -311,21 +311,21 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
     sum.val<float32>() = 0;
     reduce_zTr();
     auto new_zTr = sum.val<float32>();
-    // TC_P(new_zTr);
+    // TI_P(new_zTr);
     sum.val<float32>() = 0;
     reduce_r();
     auto rTr = sum.val<float32>();
-    TC_P(rTr);
+    TI_P(rTr);
     if (rTr < initial_rTr * 1e-12f)
       break;
     // beta = new rTr / old rTr
     beta.val<float32>() = new_zTr / old_zTr;
-    // TC_P(beta.val<float32>());
+    // TI_P(beta.val<float32>());
     // p = z + beta p
     update_p();
     old_zTr = new_zTr;
   }
-  TC_P(Time::get_time() - t);
+  TI_P(Time::get_time() - t);
   get_current_program().profiler_print();
 
   compute_Ap();
@@ -339,8 +339,8 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
       }
     }
   }
-  TC_P(residual);
-  // TC_P(difference_max);
+  TI_P(residual);
+  // TI_P(difference_max);
 
   std::vector<float32> ref_input(pow<3>(n / 2));
   if (load_gt) {
@@ -351,7 +351,7 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
   for (auto r : ref_input) {
     absmax = std::max(std::abs(absmax), r);
   }
-  TC_P(absmax);
+  TI_P(absmax);
 
   int gui_res = 512;
   GUI gui("MGPCG Poisson", Vector2i(gui_res + 200, gui_res), false);
@@ -376,6 +376,6 @@ auto mgpcg_poisson = [](std::vector<std::string> cli_param) {
     gui.update();
   }
 };
-TC_REGISTER_TASK(mgpcg_poisson);
+TI_REGISTER_TASK(mgpcg_poisson);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/examples/cpp/mpm_benchmark.cpp b/examples/cpp/mpm_benchmark.cpp
index 618492974513f..74d34a4ce40b1 100644
--- a/examples/cpp/mpm_benchmark.cpp
+++ b/examples/cpp/mpm_benchmark.cpp
@@ -6,7 +6,7 @@
 #include <taichi/common/bit.h>
 #include <taichi/system/profiler.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using namespace Tlang;
 
@@ -15,18 +15,18 @@ auto mpm_benchmark = [](std::vector<std::string> cli_param) {
 
   auto param = parse_param(cli_param);
   bool particle_soa = param.get("particle_soa", false);
-  TC_P(particle_soa);
+  TI_P(particle_soa);
   bool block_soa = param.get("block_soa", true);
-  TC_P(block_soa);
+  TI_P(block_soa);
   bool use_cache = param.get("use_cache", true);
-  TC_P(use_cache);
+  TI_P(use_cache);
   bool initial_reorder = param.get("initial_reorder", true);
-  TC_P(initial_reorder);
+  TI_P(initial_reorder);
   bool initial_shuffle = param.get("initial_shuffle", false);
-  TC_P(initial_shuffle);
+  TI_P(initial_shuffle);
   prog.config.lower_access = param.get("lower_access", false);
   int stagger = param.get("stagger", true);
-  TC_P(stagger);
+  TI_P(stagger);
 
   constexpr int dim = 3, n = 256, grid_block_size = 4, n_particles = 775196;
   const real dt = 1e-5_f * 256 / n, dx = 1.0_f / n, inv_dx = 1.0_f / dx;
@@ -45,7 +45,7 @@ auto mpm_benchmark = [](std::vector<std::string> cli_param) {
   p_x.resize(n_particles);
   std::vector<float> benchmark_particles;
   auto f = fopen("dragon_particles.bin", "rb");
-  TC_ASSERT_INFO(f, "./dragon_particles.bin not found");
+  TI_ASSERT_INFO(f, "./dragon_particles.bin not found");
   benchmark_particles.resize(n_particles * 3);
   if (std::fread(benchmark_particles.data(), sizeof(float), n_particles * 3,
                  f)) {
@@ -81,7 +81,7 @@ auto mpm_benchmark = [](std::vector<std::string> cli_param) {
       place(particle_x(i));
     for (int i = 0; i < dim; i++)
       place(particle_v(i));
-    TC_ASSERT(n % grid_block_size == 0);
+    TI_ASSERT(n % grid_block_size == 0);
     auto &block = root.dense({i, j, k}, n / grid_block_size).pointer();
     if (block_soa) {
       block.dense({i, j, k}, grid_block_size).place(grid_v(0));
@@ -253,7 +253,7 @@ auto mpm_benchmark = [](std::vector<std::string> cli_param) {
     }
     prog.profiler_print();
     auto ms_per_substep = (Time::get_time() - t) / 200 * 1000;
-    TC_P(ms_per_substep);
+    TI_P(ms_per_substep);
   };
 
 #if (0)
@@ -323,6 +323,6 @@ auto mpm_benchmark = [](std::vector<std::string> cli_param) {
   }
 #endif
 };
-TC_REGISTER_TASK(mpm_benchmark);
+TI_REGISTER_TASK(mpm_benchmark);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/examples/cpp/mpm_full.cpp b/examples/cpp/mpm_full.cpp
index bcfa7aa495ca4..7272bf62d59b3 100644
--- a/examples/cpp/mpm_full.cpp
+++ b/examples/cpp/mpm_full.cpp
@@ -5,7 +5,7 @@
 #include <taichi/system/profiler.h>
 #include "volume_renderer.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using namespace Tlang;
 
@@ -37,16 +37,16 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
   int seeding_frames = param.get("seeding_frames", 300);
 
   real G = -1000.0f;
-  TC_P(total_frames);
-  TC_P(seeding_frames);
-  TC_P(dt);
+  TI_P(total_frames);
+  TI_P(seeding_frames);
+  TI_P(dt);
   dt = frame_dt / std::ceil(frame_dt / dt - 1e-5f);
-  TC_P(dt);
-  TC_P(frame_dt);
+  TI_P(dt);
+  TI_P(frame_dt);
   int visualize_interval = param.get<int32>("visualize_interval", 5);
-  TC_P(visualize_interval);
+  TI_P(visualize_interval);
   real ground_friction = param.get<real>("ground_friction", 0.2f);
-  TC_P(ground_friction);
+  TI_P(ground_friction);
   auto particle_mass = 1.0_f, vol = 1.0_f;
   auto E = param.get<real>("E", 1e4), nu = 0.3f;
   real mu_0 = E / (2 * (1 + nu)), lambda_0 = E * nu / ((1 + nu) * (1 - 2 * nu));
@@ -61,7 +61,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
   bool bbox = param.get("bbox", false);
   int scene = param.get("scene", 0);
   if (scene == 0) {
-    TC_INFO(
+    TI_INFO(
         "Scene: [1] ball smash    [2] one jet    [3] two static jets   [4] two "
         "moving jets  [5] particle curtain");
     exit(0);
@@ -78,12 +78,12 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
   } else if (material_name == "jelly") {
     material = MPMMaterial::jelly;
   } else {
-    TC_ERROR("Unknown material {}", material_name);
+    TI_ERROR("Unknown material {}", material_name);
   }
 
-  TC_P(material_name);
-  TC_P(bbox);
-  TC_P(scene);
+  TI_P(material_name);
+  TI_P(bbox);
+  TI_P(scene);
 
   Vector particle_x("x", f32, dim), particle_v("v", f32, dim);
   Global(particle_color, i32);
@@ -105,9 +105,9 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
   if (benchmark_dragon) {
     n_particles = 775196;
     p_x.resize(n_particles);
-    TC_ASSERT(n_particles <= max_n_particles);
+    TI_ASSERT(n_particles <= max_n_particles);
     auto f = fopen("dragon_particles.bin", "rb");
-    TC_ASSERT_INFO(f, "./dragon_particles.bin not found");
+    TI_ASSERT_INFO(f, "./dragon_particles.bin not found");
     benchmark_particles.resize(n_particles * 3);
     if (std::fread(benchmark_particles.data(), sizeof(float), n_particles * 3,
                    f)) {
@@ -141,7 +141,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
     }
   }
 
-  TC_ASSERT(n_particles <= max_n_particles);
+  TI_ASSERT(n_particles <= max_n_particles);
 
   auto sample_unit_sphere = [&] {
     Vector3 offset = Vector3::rand() - Vector3(0.5_f);
@@ -186,7 +186,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
     place(particle_J);
     place(particle_color);
 
-    TC_ASSERT(n % grid_block_size == 0);
+    TI_ASSERT(n % grid_block_size == 0);
     auto &block = root.dense({i, j, k}, grid_n / 4 / grid_block_size)
                       .pointer()
                       .dense({i, j, k}, 4)
@@ -215,7 +215,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
 
   // prog.visualize_layout("layout.tex");
 
-  TC_ASSERT(bit::is_power_of_two(n));
+  TI_ASSERT(bit::is_power_of_two(n));
 
   Kernel(summarize).def([&] {
     BlockDim(512);
@@ -560,15 +560,15 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
         ->clear_data_and_deactivate();
     auto t = Time::get_time();
     for (int f = 0; f < std::round(frame_dt / dt); f++) {
-      TC_PROFILE("p2g", p2g());
-      TC_PROFILE("grid_op", grid_op());
-      TC_PROFILE("g2p", g2p());
+      TI_PROFILE("p2g", p2g());
+      TI_PROFILE("grid_op", grid_op());
+      TI_PROFILE("g2p", g2p());
     }
     prog.profiler_print();
     auto ms_per_substep = (Time::get_time() - t) / 200 * 1000;
-    TC_P(ms_per_substep);
+    TI_P(ms_per_substep);
     auto sec_per_frame = Time::get_time() - t;
-    TC_P(sec_per_frame);
+    TI_P(sec_per_frame);
   };
 
   Kernel(set_renderer_volume).def([&] {
@@ -584,7 +584,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
   for (int frame = 0; frame < total_frames; frame++) {
     float32 current_t = frame_dt * frame;
     if (frame < seeding_frames) {
-      TC_P(scene);
+      TI_P(scene);
       if (scene == 2) {
         int N = 10000;
         if (n_particles + N <= max_n_particles) {
@@ -625,7 +625,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
         n_particles += N;
       } else if (scene == 5) {
         int N = 10000;
-        TC_P(N);
+        TI_P(N);
         if (n_particles + N <= max_n_particles) {
           for (int i = 0; i < N; i++) {
             Vector3 color(0.5, 0.6, 0.4);
@@ -640,7 +640,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
         }
       }
     }
-    TC_P(n_particles)
+    TI_P(n_particles)
     simulate_frame();
     // auto res = canvas.img.get_res();
 
@@ -656,11 +656,11 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
     auto ot = Time::get_time();
     std::vector<float32> particle_data(max_n_particles * 7);
     summarize();
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
     cudaMemcpy(particle_data.data(), &particle_buffer.val<float32>(0),
                particle_data.size() * sizeof(float), cudaMemcpyDeviceToHost);
 #else
-    TC_ERROR("No CUDA.");
+    TI_ERROR("No CUDA.");
 #endif
 
     for (int i = 0; i < n_particles; i++) {
@@ -672,7 +672,7 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
             std::max(renderer.parameters.box_max[k], v);
       }
     }
-    TC_P(Time::get_time() - ot);
+    TI_P(Time::get_time() - ot);
     create_directories(fmt::format("final_particles/{}", output));
     /*
     write_to_binary_file(
@@ -714,9 +714,9 @@ auto mpm_full = [](std::vector<std::string> cli_param) {
     print_profile_info();
   }
 };
-TC_REGISTER_TASK(mpm_full);
+TI_REGISTER_TASK(mpm_full);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 // demos:
 // two sand jets:
diff --git a/examples/cpp/ray_march.cpp b/examples/cpp/ray_march.cpp
index 86fd8e9a52c2d..bf697c0c4637e 100644
--- a/examples/cpp/ray_march.cpp
+++ b/examples/cpp/ray_march.cpp
@@ -137,6 +137,6 @@ auto ray_march = [] {
     gui.update();
   }
 };
-TC_REGISTER_TASK(ray_march);
+TI_REGISTER_TASK(ray_march);
 
 TLANG_NAMESPACE_END
diff --git a/examples/cpp/smoke_renderer.cpp b/examples/cpp/smoke_renderer.cpp
index d3e61645fd6c1..74db4fdfb9cd6 100644
--- a/examples/cpp/smoke_renderer.cpp
+++ b/examples/cpp/smoke_renderer.cpp
@@ -9,10 +9,10 @@ extern bool use_gui;
 auto smoke_renderer = [](std::vector<std::string> cli_param_) {
   auto cli_param = parse_param(cli_param_);
   bool gpu = cli_param.get("gpu", true);
-  TC_P(gpu);
+  TI_P(gpu);
   Program prog(gpu ? Arch::gpu : Arch::x86_64);
   bool benchmark = true;  // benchmark the bunny cloud against tungsten?
-  TC_ASSERT(benchmark);
+  TI_ASSERT(benchmark);
   // CoreState::set_trigger_gdb_when_crash(true);
   // prog.config.print_ir = true;
   float32 target_max_density = 50.f;
@@ -37,7 +37,7 @@ auto smoke_renderer = [](std::vector<std::string> cli_param_) {
 
   if (benchmark) {
     auto f = fopen("bunny_cloud.bin", "rb");
-    TC_ASSERT_INFO(f, "./bunny_cloud.bin not found");
+    TI_ASSERT_INFO(f, "./bunny_cloud.bin not found");
     int box_sizes[3]{584, 576, 440};
     int total_voxels = box_sizes[0] * box_sizes[1] * box_sizes[2];
     std::vector<float32> density_field(total_voxels);
@@ -50,7 +50,7 @@ auto smoke_renderer = [](std::vector<std::string> cli_param_) {
       max_density = std::max(max_density, density_field[i]);
     }
 
-    TC_P(max_density);
+    TI_P(max_density);
 
     for (int i = 0; i < total_voxels; i++) {
       density_field[i] /= max_density;         // normalize to 1 first
@@ -125,13 +125,13 @@ auto smoke_renderer = [](std::vector<std::string> cli_param_) {
     }
   }
 };
-TC_REGISTER_TASK(smoke_renderer);
+TI_REGISTER_TASK(smoke_renderer);
 
 auto smoke_renderer_gui = [](std::vector<std::string> cli_param) {
   use_gui = true;
   smoke_renderer(cli_param);
 };
 
-TC_REGISTER_TASK(smoke_renderer_gui);
+TI_REGISTER_TASK(smoke_renderer_gui);
 
 TLANG_NAMESPACE_END
diff --git a/examples/cpp/smoke_renderer.h b/examples/cpp/smoke_renderer.h
index be7399f9ac63f..4673eee5211f7 100644
--- a/examples/cpp/smoke_renderer.h
+++ b/examples/cpp/smoke_renderer.h
@@ -29,8 +29,8 @@ class SmokeRenderer {
     depth_limit = param.get("depth_limit", 128);
     output_res = param.get("output_res", Vector2i(1024, 512));
 
-    TC_ASSERT(bit::is_power_of_two(output_res.x));
-    TC_ASSERT(bit::is_power_of_two(output_res.y));
+    TI_ASSERT(bit::is_power_of_two(output_res.x));
+    TI_ASSERT(bit::is_power_of_two(output_res.y));
 
     sky_map_size = Vector2i(512, 128);
     n_sky_samples = 1024;
@@ -373,12 +373,12 @@ class SmokeRenderer {
     std::FILE *f;
     if (use_sky_map) {
       f = fopen("sky_map.bin", "rb");
-      TC_ASSERT_INFO(f, "./sky_map.bin not found");
+      TI_ASSERT_INFO(f, "./sky_map.bin not found");
       std::vector<uint32> sky_map_data(sky_map_size.prod() * 3);
       std::fread(sky_map_data.data(), sizeof(uint32), sky_map_data.size(), f);
 
       f = fopen("sky_samples.bin", "rb");
-      TC_ASSERT_INFO(f, "./sky_samples.bin not found");
+      TI_ASSERT_INFO(f, "./sky_samples.bin not found");
       std::vector<uint32> sky_sample_data(n_sky_samples * 5);
       trash(std::fread(sky_sample_data.data(), sizeof(uint32),
                        sky_sample_data.size(), f));
diff --git a/examples/cpp/volume_renderer.cpp b/examples/cpp/volume_renderer.cpp
index b4f1608a93001..ea5fe7e1e8f3a 100644
--- a/examples/cpp/volume_renderer.cpp
+++ b/examples/cpp/volume_renderer.cpp
@@ -4,7 +4,7 @@ TLANG_NAMESPACE_BEGIN
 bool use_gui = false;
 TLANG_NAMESPACE_END
 
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
 #include <cuda_runtime_api.h>
 
 TLANG_NAMESPACE_BEGIN
@@ -13,9 +13,9 @@ auto volume_renderer = [](std::vector<std::string> cli_param) {
   auto param = parse_param(cli_param);
 
   bool gpu = param.get("gpu", true);
-  TC_P(gpu);
+  TI_P(gpu);
   std::string fn = param.get("fn", "snow_density_256.bin");
-  TC_P(fn);
+  TI_P(fn);
   CoreState::set_trigger_gdb_when_crash(true);
   Program prog(gpu ? Arch::gpu : Arch::x86_64);
   TRenderer renderer((Dict()));
@@ -71,7 +71,7 @@ auto volume_renderer = [](std::vector<std::string> cli_param) {
     last_voxel_level = voxel_level;
     std::vector<Vector3> particles;
     auto f = fopen(fn.c_str(), "rb");
-    TC_WARN_IF(!f, "{} not found", fn);
+    TI_WARN_IF(!f, "{} not found", fn);
 
     if (!f)
       return;
@@ -91,7 +91,7 @@ auto volume_renderer = [](std::vector<std::string> cli_param) {
         max_density = std::max(max_density, density_field[i]);
       }
 
-      TC_P(max_density);
+      TI_P(max_density);
 
       for (int i = 0; i < pow<3>(grid_resolution); i++) {
         density_field[i] /= max_density;             // normalize to 1 first
@@ -135,11 +135,11 @@ auto volume_renderer = [](std::vector<std::string> cli_param) {
       } else if (voxel_level == 4) {
         coarsening = 64;
       } else {
-        TC_ASSERT(false);
+        TI_ASSERT(false);
       }
       renderer.parameters.grid_resolution = 256 / coarsening;
       renderer.check_param_update();
-      TC_P(particles.size());
+      TI_P(particles.size());
       rasterize();
     }
     for (int d = 0; d < 3; d++) {
@@ -267,17 +267,17 @@ auto volume_renderer = [](std::vector<std::string> cli_param) {
       frame += video_step - 1;
     gui->canvas->img.write_as_image(fmt::format("gui/{:05d}.png", frame));
     Time::sleep(0.03);
-    TC_P(Time::get_time() - ft);
+    TI_P(Time::get_time() - ft);
   }
 };
-TC_REGISTER_TASK(volume_renderer);
+TI_REGISTER_TASK(volume_renderer);
 
 auto volume_renderer_gui = [](std::vector<std::string> cli_param) {
   use_gui = true;
   volume_renderer(cli_param);
 };
 
-TC_REGISTER_TASK(volume_renderer_gui);
+TI_REGISTER_TASK(volume_renderer_gui);
 
 TLANG_NAMESPACE_END
 #endif
diff --git a/examples/cpp/volume_renderer.h b/examples/cpp/volume_renderer.h
index 853a68eadee32..a21c404137f6a 100644
--- a/examples/cpp/volume_renderer.h
+++ b/examples/cpp/volume_renderer.h
@@ -67,7 +67,7 @@ class TRenderer {
   }
 
   void place_data() {
-    TC_ASSERT(output_res == Vector2i(1280, 720));
+    TI_ASSERT(output_res == Vector2i(1280, 720));
     root.dense(Index(0), 1024 * 1024).place(buffer(0), buffer(1), buffer(2));
 
     root.dense(Indices(0, 1, 2), 4)
@@ -669,14 +669,14 @@ class TRenderer {
     std::FILE *f;
     if (use_sky_map) {
       f = fopen("sky_map.bin", "rb");
-      TC_ASSERT_INFO(f, "./sky_map.bin not found");
+      TI_ASSERT_INFO(f, "./sky_map.bin not found");
       std::vector<uint32> sky_map_data(sky_map_size.prod() * 3);
       if (std::fread(sky_map_data.data(), sizeof(uint32), sky_map_data.size(),
                      f)) {
       }
 
       f = fopen("sky_samples.bin", "rb");
-      TC_ASSERT_INFO(f, "./sky_samples.bin not found");
+      TI_ASSERT_INFO(f, "./sky_samples.bin not found");
       std::vector<uint32> sky_sample_data(n_sky_samples * 5);
       if (std::fread(sky_sample_data.data(), sizeof(uint32),
                      (int)sky_sample_data.size(), f)) {
diff --git a/examples/cpp/voxel_renderer.cpp b/examples/cpp/voxel_renderer.cpp
index 99ffb0e5b7054..bf0122d6042fc 100644
--- a/examples/cpp/voxel_renderer.cpp
+++ b/examples/cpp/voxel_renderer.cpp
@@ -9,7 +9,7 @@ auto voxel_renderer = [](const std::vector<std::string> &params) {
   int n = 512;
 
   if (params.size() < 2) {
-    TC_INFO("Usage: ti voxel renderer filename.bin resolution");
+    TI_INFO("Usage: ti voxel renderer filename.bin resolution");
     exit(-1);
   }
 
@@ -183,7 +183,7 @@ auto voxel_renderer = [](const std::vector<std::string> &params) {
       fn = fmt::format(params[0], frame);
     }
     auto f = fopen(fn.c_str(), "rb");
-    TC_ERROR_UNLESS(f, "File {} not found", params[0]);
+    TI_ERROR_UNLESS(f, "File {} not found", params[0]);
     std::vector<char> density_field(pow<3>(grid_resolution), 0);
     trash(std::fread(density_field.data(), sizeof(char), density_field.size(),
                      f));
@@ -214,6 +214,6 @@ auto voxel_renderer = [](const std::vector<std::string> &params) {
     gui.canvas->img.write_as_image(fn + ".png");
   }
 };
-TC_REGISTER_TASK(voxel_renderer);
+TI_REGISTER_TASK(voxel_renderer);
 
 TLANG_NAMESPACE_END
diff --git a/examples/mpm128.py b/examples/mpm128.py
index 2e7f207080b9e..ffdfdacf3da4a 100644
--- a/examples/mpm128.py
+++ b/examples/mpm128.py
@@ -98,7 +98,7 @@ def reset():
   Jp.fill(1)
   C.fill(0)
   
-print("[Hint] Use WSAD/arrow keys to control gravity. Use left/right mouse bottons to attract/repel. (OS X not yet supported)")
+print("[Hint] Use WSAD/arrow keys to control gravity. Use left/right mouse bottons to attract/repel.")
 gui = ti.GUI("Taichi MLS-MPM-128", res=512, background_color=0x112F41)
 reset()
 
diff --git a/examples/renderer.py b/examples/renderer.py
index ab56aa2a5f975..4fd8862328872 100644
--- a/examples/renderer.py
+++ b/examples/renderer.py
@@ -60,8 +60,7 @@ def buffers():
 
   ti.root.dense(ti.ijk, 2).dense(ti.ijk, particle_grid_res // 8).dense(
       ti.ijk, 8).place(voxel_has_particle)
-  ti.root.dense(ti.ijk, 4).dense(
-      ti.ijk, particle_grid_res // 8).pointer().dense(ti.ijk, 8).dynamic(
+  ti.root.dense(ti.ijk, 4).pointer(ti.ijk, particle_grid_res // 8).dense(ti.ijk, 8).dynamic(
           ti.l, max_num_particles_per_cell, 512).place(pid)
 
   ti.root.dense(ti.l, max_num_particles).place(particle_x, particle_v,
diff --git a/examples/taichi_sparse.py b/examples/taichi_sparse.py
index c4bd2a53d04ef..be8abd2c2c03b 100644
--- a/examples/taichi_sparse.py
+++ b/examples/taichi_sparse.py
@@ -7,9 +7,9 @@
 res = n + n // 4 + n // 16 + n // 64
 img = ti.var(ti.f32, shape=(res, res))
 
-block1 = ti.root.dense(ti.ij, n // 64).pointer()
-block2 = block1.dense(ti.ij, 4).pointer()
-block3 = block2.dense(ti.ij, 4).pointer()
+block1 = ti.root.pointer(ti.ij, n // 64)
+block2 = block1.pointer(ti.ij, 4)
+block3 = block2.pointer(ti.ij, 4)
 block3.dense(ti.ij, 4).place(x)
 
 @ti.func
diff --git a/external/include/miniz.h b/external/include/miniz.h
index 2ef07e3cd77a5..68cd8717d3153 100644
--- a/external/include/miniz.h
+++ b/external/include/miniz.h
@@ -4297,7 +4297,7 @@ static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream)
 #define MZ_FFLUSH fflush
 #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
 #define MZ_DELETE_FILE remove
-#elif defined(__GNUC__) && _LARGEFILE64_SOURCE && !defined(TC_PLATFORM_OSX)
+#elif defined(__GNUC__) && _LARGEFILE64_SOURCE && !defined(TI_PLATFORM_OSX)
 #ifndef MINIZ_NO_TIME
 #include <utime.h>
 #endif
diff --git a/external/include/stb_image.h b/external/include/stb_image.h
index de04259da53b9..aa053383b4b8e 100644
--- a/external/include/stb_image.h
+++ b/external/include/stb_image.h
@@ -1,4 +1,4 @@
-#if defined(TC_IMAGE_IO)
+#if defined(TI_IMAGE_IO)
 /* stb_image - v2.16 - public domain image loader - http://nothings.org/stb_image.h
                                      no warranty implied; use at your own risk
 
diff --git a/external/include/stb_image_write.h b/external/include/stb_image_write.h
index b53ae306d57b7..7b4c1c3df6dc0 100644
--- a/external/include/stb_image_write.h
+++ b/external/include/stb_image_write.h
@@ -1,4 +1,4 @@
-#if defined(TC_IMAGE_IO)
+#if defined(TI_IMAGE_IO)
 /* stb_image_write - v1.07 - public domain - http://nothings.org/stb/stb_image_write.h
    writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015
                                      no warranty implied; use at your own risk
diff --git a/misc/amalgamate.py b/misc/amalgamate.py
index d6dfe56f571a4..893dd1e76c130 100644
--- a/misc/amalgamate.py
+++ b/misc/amalgamate.py
@@ -66,7 +66,7 @@ def include(self, fn):
           if l == '#endif':
             protected = False
           continue
-        if l == '#if !defined(TC_AMALGAMATED)':
+        if l == '#if !defined(TI_AMALGAMATED)':
           protected = True
           continue
         match = re.search(include_template, l)
@@ -109,9 +109,9 @@ def run(self):
     print(
         "// DO NOT EDIT MANUALLY, unless you know that you are doing.",
         file=self.output_f)
-    print("#define TC_INCLUDED", file=self.output_f)
-    print("#define TC_AMALGAMATED", file=self.output_f)
-    print("#define TC_ISE_NONE", file=self.output_f)
+    print("#define TI_INCLUDED", file=self.output_f)
+    print("#define TI_AMALGAMATED", file=self.output_f)
+    print("#define TI_ISE_NONE", file=self.output_f)
     for f in self.files:
       self.include(f)
     print("Included files:")
@@ -130,7 +130,7 @@ def test(self):
   Vector4 v(21);
   auto x = v + v;
   fmt::print("{}\\n", x.x);
-  TC_P(x);
+  TI_P(x);
 }
 ''')
     t = time.time()
diff --git a/misc/appveyor_filter.py b/misc/appveyor_filter.py
new file mode 100644
index 0000000000000..289daaf781f34
--- /dev/null
+++ b/misc/appveyor_filter.py
@@ -0,0 +1,8 @@
+import sys
+import os
+
+msg = os.environ["APPVEYOR_REPO_COMMIT_MESSAGE"]
+if msg.startswith('[release]') or sys.version_info[1] == 6:
+  exit(0) # Build for this configuration (starts with '[release]', or python version is 3.6)
+else:
+  exit(1) # Do not build this configuration. See appveyor.yml
diff --git a/misc/obsolete_cpp_tests/allocator.cpp b/misc/obsolete_cpp_tests/allocator.cpp
index 0e9184ae2ec2a..7cac37e5191c6 100644
--- a/misc/obsolete_cpp_tests/allocator.cpp
+++ b/misc/obsolete_cpp_tests/allocator.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("gpu_gc_basics") {
+TI_TEST("gpu_gc_basics") {
   for (auto arch : {Arch::gpu}) {
     int n = 32;
     Program prog(arch);
@@ -27,26 +27,26 @@ TC_TEST("gpu_gc_basics") {
     })();
 
     auto stat = x.parent().parent().snode()->stat();
-    TC_CHECK(stat.num_resident_blocks == n - 1);
+    TI_CHECK(stat.num_resident_blocks == n - 1);
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < i; j++) {
-        TC_CHECK(x.val<int>(i, j) == i + j);
+        TI_CHECK(x.val<int>(i, j) == i + j);
       }
     }
     x.parent().parent().snode()->clear_data_and_deactivate();
     stat = x.parent().parent().snode()->stat();
-    TC_CHECK(stat.num_resident_blocks == 0);
-    TC_CHECK(stat.num_recycled_blocks == 0);
+    TI_CHECK(stat.num_resident_blocks == 0);
+    TI_CHECK(stat.num_recycled_blocks == 0);
 
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < i; j++) {
-        TC_CHECK(x.val<int>(i, j) == 0);
+        TI_CHECK(x.val<int>(i, j) == 0);
       }
     }
   }
 };
 
-TC_TEST("parallel_particle_sort") {
+TI_TEST("parallel_particle_sort") {
   Program prog(Arch::gpu);
   CoreState::set_trigger_gdb_when_crash(true);
 
@@ -76,7 +76,7 @@ TC_TEST("parallel_particle_sort") {
     p_x[i] = Vector3(0.5_f) + offset * 0.7f;
   }
 
-  TC_ASSERT(n_particles <= max_n_particles);
+  TI_ASSERT(n_particles <= max_n_particles);
 
   auto i = Index(0), j = Index(1), k = Index(2);
   auto p = Index(3);
@@ -89,14 +89,14 @@ TC_TEST("parallel_particle_sort") {
 
     root.dense(i, max_n_particles).place(flag);
 
-    TC_ASSERT(n % grid_block_size == 0);
+    TI_ASSERT(n % grid_block_size == 0);
     root.dense({i, j, k}, n / grid_block_size)
         .pointer()
         .dense({i, j, k}, grid_block_size)
         .place(grid_m);
   });
 
-  TC_ASSERT(bit::is_power_of_two(n));
+  TI_ASSERT(bit::is_power_of_two(n));
 
   Kernel(sort).def([&] {
     BlockDim(256);
@@ -121,7 +121,7 @@ TC_TEST("parallel_particle_sort") {
     sort();
     prog.synchronize();
     for (int k = 0; k < max_n_particles; k++) {
-      TC_CHECK(flag.val<int32>(k) == 1);
+      TI_CHECK(flag.val<int32>(k) == 1);
     }
     auto stat = grid_m.parent().parent().snode()->stat();
     int nb = stat.num_resident_blocks;
@@ -129,14 +129,14 @@ TC_TEST("parallel_particle_sort") {
       last_nb = nb;
     } else {
       if (last_nb != nb) {
-        TC_P(i);
+        TI_P(i);
       }
-      TC_CHECK(last_nb == nb);
+      TI_CHECK(last_nb == nb);
     }
   }
 };
 
-TC_TEST("struct_for") {
+TI_TEST("struct_for") {
   Program prog(Arch::gpu);
   CoreState::set_trigger_gdb_when_crash(true);
 
@@ -166,7 +166,7 @@ TC_TEST("struct_for") {
     p_x[i] = Vector3(0.5_f) + offset * 0.7f;
   }
 
-  TC_ASSERT(n_particles <= max_n_particles);
+  TI_ASSERT(n_particles <= max_n_particles);
 
   auto i = Index(0), j = Index(1), k = Index(2);
   auto p = Index(3);
@@ -201,8 +201,8 @@ TC_TEST("struct_for") {
         big_count += 1;
       }
     }
-    TC_CHECK(zero_count == 0);
-    TC_CHECK(big_count == 0);
+    TI_CHECK(zero_count == 0);
+    TI_CHECK(big_count == 0);
   }
 };
 
diff --git a/misc/obsolete_cpp_tests/atomics.cpp b/misc/obsolete_cpp_tests/atomics.cpp
index 055f8e606555d..5183caf3fe523 100644
--- a/misc/obsolete_cpp_tests/atomics.cpp
+++ b/misc/obsolete_cpp_tests/atomics.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("atomics") {
+TI_TEST("atomics") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 10000000;
   Program prog(Arch::x86_64);
@@ -23,11 +23,11 @@ TC_TEST("atomics") {
 
   func();
 
-  TC_CHECK(sum.val<int>() == n);
-  TC_CHECK(fsum.val<float32>() == 0);
+  TI_CHECK(sum.val<int>() == n);
+  TI_CHECK(fsum.val<float32>() == 0);
 };
 
-TC_TEST("atomics2") {
+TI_TEST("atomics2") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 1000;
   Program prog(Arch::x86_64);
@@ -42,10 +42,10 @@ TC_TEST("atomics2") {
 
   func();
 
-  TC_CHECK(fsum.val<float32>() == 1000);
+  TI_CHECK(fsum.val<float32>() == 1000);
 };
 
-TC_TEST("parallel_reduce") {
+TI_TEST("parallel_reduce") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 1024 * 1024 * 32;
   Program prog(Arch::x86_64);
@@ -70,7 +70,7 @@ TC_TEST("parallel_reduce") {
     reduce();
   prog.profiler_print();
 
-  TC_CHECK(fsum.val<int32>() == (n / 2) * (n - 1) * 10);
+  TI_CHECK(fsum.val<int32>() == (n / 2) * (n - 1) * 10);
 };
 
 TLANG_NAMESPACE_END
diff --git a/misc/obsolete_cpp_tests/compiler_basics.cpp b/misc/obsolete_cpp_tests/compiler_basics.cpp
index 5c91df12fac10..349924776b8fe 100644
--- a/misc/obsolete_cpp_tests/compiler_basics.cpp
+++ b/misc/obsolete_cpp_tests/compiler_basics.cpp
@@ -5,7 +5,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("compiler_linalg") {
+TI_TEST("compiler_linalg") {
   CoreState::set_trigger_gdb_when_crash(true);
   Program prog(Arch::x86_64);
 
@@ -33,7 +33,7 @@ TC_TEST("compiler_linalg") {
   })();
 };
 
-TC_TEST("select") {
+TI_TEST("select") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::x86_64);
@@ -53,11 +53,11 @@ TC_TEST("select") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
+    TI_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
   }
 };
 
-TC_TEST("compiler_basics") {
+TI_TEST("compiler_basics") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::x86_64);
@@ -79,11 +79,11 @@ TC_TEST("compiler_basics") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
+    TI_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
   }
 };
 
-TC_TEST("simplify_access") {
+TI_TEST("simplify_access") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::x86_64);
@@ -96,7 +96,7 @@ TC_TEST("simplify_access") {
   kernel([&]() { For(a, [&](Expr i) { a[i] = b[i] + 1; }); })();
 };
 
-TC_TEST("fancy_for") {
+TI_TEST("fancy_for") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::x86_64);
@@ -118,11 +118,11 @@ TC_TEST("fancy_for") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
+    TI_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
   }
 };
 
-TC_TEST("simd_if") {
+TI_TEST("simd_if") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::x86_64);
@@ -144,11 +144,11 @@ TC_TEST("simd_if") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
+    TI_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
   }
 };
 
-TC_TEST("simd_if2") {
+TI_TEST("simd_if2") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 32;
   Program prog(Arch::x86_64);
@@ -170,7 +170,7 @@ TC_TEST("simd_if2") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int32>(i) == (1 + i % 3) * i);
+    TI_CHECK(a.val<int32>(i) == (1 + i % 3) * i);
   }
 };
 
@@ -206,9 +206,9 @@ auto test_circle = [] {
     gui.update();
   }
 };
-TC_REGISTER_TASK(test_circle);
+TI_REGISTER_TASK(test_circle);
 
-TC_TEST("vectorize") {
+TI_TEST("vectorize") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::x86_64);
@@ -223,11 +223,11 @@ TC_TEST("vectorize") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == i);
+    TI_CHECK(a.val<int>(i) == i);
   }
 };
 
-TC_TEST("rand") {
+TI_TEST("rand") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 4;
   Program prog(Arch::x86_64);
@@ -239,7 +239,7 @@ TC_TEST("rand") {
   kernel([&]() { For(0, n, [&](Expr i) { Print(Rand<float>()); }); })();
 };
 
-TC_TEST("while") {
+TI_TEST("while") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 4096;
   Program prog(Arch::x86_64);
@@ -262,11 +262,11 @@ TC_TEST("while") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == (i - 1) * i / 2);
+    TI_CHECK(a.val<int>(i) == (i - 1) * i / 2);
   }
 };
 
-TC_TEST("slp") {
+TI_TEST("slp") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   Program prog(Arch::x86_64);
@@ -290,14 +290,14 @@ TC_TEST("slp") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == 1);
-    TC_CHECK(b.val<int>(i) == 2);
-    TC_CHECK(c.val<int>(i) == 3);
-    TC_CHECK(d.val<int>(i) == 4);
+    TI_CHECK(a.val<int>(i) == 1);
+    TI_CHECK(b.val<int>(i) == 2);
+    TI_CHECK(c.val<int>(i) == 3);
+    TI_CHECK(d.val<int>(i) == 4);
   }
 };
 
-TC_TEST("slp1") {
+TI_TEST("slp1") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   for (auto slp1 : {true, false}) {
@@ -322,13 +322,13 @@ TC_TEST("slp1") {
 
     for (int i = 0; i < n; i++) {
       for (int d = 0; d < 4; d++) {
-        TC_CHECK(grid(d).val<float32>(i) == d);
+        TI_CHECK(grid(d).val<float32>(i) == d);
       }
     }
   }
 };
 
-TC_TEST("slp2") {
+TI_TEST("slp2") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   Program prog(Arch::x86_64);
@@ -348,12 +348,12 @@ TC_TEST("slp2") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == 1 + i * 7);
-    TC_CHECK(b.val<int>(i) == 2 + i * 9);
+    TI_CHECK(a.val<int>(i) == 1 + i * 7);
+    TI_CHECK(b.val<int>(i) == 2 + i * 9);
   }
 };
 
-TC_TEST("slp3") {
+TI_TEST("slp3") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   Program prog(Arch::x86_64);
@@ -375,12 +375,12 @@ TC_TEST("slp3") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == 1 + i * 7);
-    TC_CHECK(b.val<int>(i) == 2 + i * 9);
+    TI_CHECK(a.val<int>(i) == 1 + i * 7);
+    TI_CHECK(b.val<int>(i) == 2 + i * 9);
   }
 };
 
-TC_TEST("slpmatvecmul") {
+TI_TEST("slpmatvecmul") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   Program prog(Arch::x86_64);
@@ -414,14 +414,14 @@ TC_TEST("slpmatvecmul") {
 
   /*
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == 1 + i * 7);
-    TC_CHECK(b.val<int>(i) == 2 + i * 9);
+    TI_CHECK(a.val<int>(i) == 1 + i * 7);
+    TI_CHECK(b.val<int>(i) == 2 + i * 9);
   }
   */
 };
 
 // scalar a * scalar b * vec c
-TC_TEST("mixed_simd1") {
+TI_TEST("mixed_simd1") {
   for (auto vec_size : {4, 8, 16}) {
     Program prog;
 
@@ -461,14 +461,14 @@ TC_TEST("mixed_simd1") {
       for (int j = 0; j < vec_size; j++) {
         auto val = v(j).val<float32>(i);
         float32 gt = i * j * 2;
-        TC_CHECK_EQUAL(gt, val, 1e-3_f);
+        TI_CHECK_EQUAL(gt, val, 1e-3_f);
       }
     }
   }
 }
 
 // Vec<vec_size> reduction
-TC_TEST("mixed_simd2") {
+TI_TEST("mixed_simd2") {
   int n = 64;
 
   for (auto vec_size : {4, 8, 16}) {
@@ -514,13 +514,13 @@ TC_TEST("mixed_simd2") {
     for (int i = 0; i < n; i++) {
       auto val = sum.val<float32>(i);
       float32 gt = vec_size * (vec_size - 1) / 2 + i * vec_size;
-      TC_CHECK_EQUAL(gt, val, 1e-5_f);
+      TI_CHECK_EQUAL(gt, val, 1e-5_f);
     }
   }
 }
 
 // reduce(vec_a<n> ** 2 - vec_b<n> ** 2) * vec_c<2n>
-TC_TEST("mixed_simd3_slp") {
+TI_TEST("mixed_simd3_slp") {
   for (auto vec_size : {16}) {
     // why vec_size = 16 fails??
     Program prog;
@@ -581,13 +581,13 @@ TC_TEST("mixed_simd3_slp") {
       for (int j = 0; j < vec_size * 2; j++) {
         auto val = c(j).val<float32>(i);
         auto gt = s * (i - 2 + j);
-        TC_CHECK_EQUAL(gt, val, 1e-3_f);
+        TI_CHECK_EQUAL(gt, val, 1e-3_f);
       }
     }
   }
 }
 
-TC_TEST("vector_split1") {
+TI_TEST("vector_split1") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 32;
   Program prog(Arch::x86_64);
@@ -603,11 +603,11 @@ TC_TEST("vector_split1") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == i);
+    TI_CHECK(a.val<int>(i) == i);
   }
 };
 
-TC_TEST("vector_split_slp") {
+TI_TEST("vector_split_slp") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 256;
   Program prog(Arch::x86_64);
@@ -632,14 +632,14 @@ TC_TEST("vector_split_slp") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int>(i) == 1 + i);
-    TC_CHECK(b.val<int>(i) == 2 + i);
-    TC_CHECK(c.val<int>(i) == 3 + i);
-    TC_CHECK(d.val<int>(i) == 4 + i);
+    TI_CHECK(a.val<int>(i) == 1 + i);
+    TI_CHECK(b.val<int>(i) == 2 + i);
+    TI_CHECK(c.val<int>(i) == 3 + i);
+    TI_CHECK(d.val<int>(i) == 4 + i);
   }
 };
 
-TC_TEST("union_cast") {
+TI_TEST("union_cast") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 16;
@@ -660,13 +660,13 @@ TC_TEST("union_cast") {
     })();
 
     for (int i = 0; i < n; i++) {
-      TC_CHECK(a.val<int>(i) ==
+      TI_CHECK(a.val<int>(i) ==
                union_cast<int32>(union_cast<float32>(i * 1000) + 1234.0f));
     }
   }
 };
 
-TC_TEST("logic_not") {
+TI_TEST("logic_not") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 16;
@@ -685,14 +685,14 @@ TC_TEST("logic_not") {
     })();
 
     for (int i = 0; i < n; i++) {
-      TC_CHECK(a.val<int>() == 0);
-      TC_CHECK(b.val<int>() != 0);
-      TC_CHECK(c.val<int>() == 0);
+      TI_CHECK(a.val<int>() == 0);
+      TI_CHECK(b.val<int>() != 0);
+      TI_CHECK(c.val<int>() == 0);
     }
   }
 };
 
-TC_TEST("simd_if_5") {
+TI_TEST("simd_if_5") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64}) {
     for (auto vec : {1, 4, 8}) {
@@ -713,13 +713,13 @@ TC_TEST("simd_if_5") {
         });
       })();
       for (int i = 0; i < n; i++) {
-        TC_CHECK(c.val<int32>(i) == 1);
+        TI_CHECK(c.val<int32>(i) == 1);
       }
     }
   }
 };
 
-TC_TEST("point_inside_box") {
+TI_TEST("point_inside_box") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64}) {
     for (auto vec : {1, 4, 8}) {
@@ -746,13 +746,13 @@ TC_TEST("point_inside_box") {
         });
       })();
       for (int i = 0; i < n; i++) {
-        TC_CHECK(bool(c.val<int32>(i)) == true);
+        TI_CHECK(bool(c.val<int32>(i)) == true);
       }
     }
   }
 };
 
-TC_TEST("while_in_while") {
+TI_TEST("while_in_while") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64}) {
     for (auto vec : {1, 4, 8}) {
@@ -782,13 +782,13 @@ TC_TEST("while_in_while") {
         });
       })();
       for (int i = 0; i < n; i++) {
-        TC_CHECK(c.val<int32>(i) == ((i + 1) % 2) * 100);
+        TI_CHECK(c.val<int32>(i) == ((i + 1) % 2) * 100);
       }
     }
   }
 };
 
-TC_TEST("cmp") {
+TI_TEST("cmp") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64}) {
     for (auto vec : {1, 4, 8}) {
@@ -812,7 +812,7 @@ TC_TEST("cmp") {
     For(0, n, [&](Expr i) { c[i] = a[i] OP b[i]; });        \
   })();                                                     \
   for (int i = 0; i < n; i++) {                             \
-    TC_CHECK(bool(c.val<int32>(i)) ==                       \
+    TI_CHECK(bool(c.val<int32>(i)) ==                       \
              bool(a.val<float32>(i) OP b.val<float32>(i))); \
   }
 
diff --git a/misc/obsolete_cpp_tests/dynamic.cpp b/misc/obsolete_cpp_tests/dynamic.cpp
index 2d7c1e832c9a2..6d866ff4f6e75 100644
--- a/misc/obsolete_cpp_tests/dynamic.cpp
+++ b/misc/obsolete_cpp_tests/dynamic.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("append_and_probe") {
+TI_TEST("append_and_probe") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 32;
@@ -31,12 +31,12 @@ TC_TEST("append_and_probe") {
     })();
 
     for (int i = 0; i < n; i++)
-      TC_CHECK(x.val<int>(i) == i);
-    TC_CHECK(len.val<int>() == n);
+      TI_CHECK(x.val<int>(i) == i);
+    TI_CHECK(len.val<int>() == n);
   }
 };
 
-TC_TEST("activate") {
+TI_TEST("activate") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 32;
     Program prog(arch);
@@ -66,13 +66,13 @@ TC_TEST("activate") {
 
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < i; j++) {
-        TC_CHECK(x.val<int>(i, j) == i + j);
+        TI_CHECK(x.val<int>(i, j) == i + j);
       }
     }
   }
 };
 
-TC_TEST("task_list") {
+TI_TEST("task_list") {
   for (auto arch : {Arch::gpu}) {
     int n = 262144;
     int m = 64;
@@ -105,13 +105,13 @@ TC_TEST("task_list") {
     for (int i = 0; i < n; i++) {
       if (i % 5 == 4)
         for (int j = 0; j < m; j++) {
-          TC_CHECK(x.val<int>(i, j) == i + j + P);
+          TI_CHECK(x.val<int>(i, j) == i + j + P);
         }
     }
   }
 };
 
-TC_TEST("task_list_dynamic") {
+TI_TEST("task_list_dynamic") {
   for (auto arch : {Arch::gpu}) {
     int n = 262144;
     int m = 64;
@@ -142,13 +142,13 @@ TC_TEST("task_list_dynamic") {
     for (int i = 0; i < n; i++) {
       if (i % 5 == 4)
         for (int j = 0; j < 1; j++) {
-          TC_CHECK(x.val<int>(i, j) == i + j + P);
+          TI_CHECK(x.val<int>(i, j) == i + j + P);
         }
     }
   }
 };
 
-TC_TEST("parallel_append") {
+TI_TEST("parallel_append") {
   for (auto arch : {Arch::gpu}) {
     int n = 32;
     Program prog(arch);
@@ -178,18 +178,18 @@ TC_TEST("parallel_append") {
       else
         append();
       auto stat = x.parent().parent().snode()->stat();
-      TC_CHECK(stat.num_resident_blocks == n);
+      TI_CHECK(stat.num_resident_blocks == n);
       if (i % 2)
         for (int i = 0; i < n; i++) {
           for (int j = 0; j < n; j++) {
-            TC_CHECK(x.val<int>(i, j) == 0);
+            TI_CHECK(x.val<int>(i, j) == 0);
           }
         }
     }
   }
 };
 
-TC_TEST("append_2d") {
+TI_TEST("append_2d") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 32;
     Program prog(arch);
@@ -211,13 +211,13 @@ TC_TEST("append_2d") {
 
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < i; j++) {
-        TC_CHECK(x.val<int>(i, j) == i + j);
+        TI_CHECK(x.val<int>(i, j) == i + j);
       }
     }
   }
 };
 
-TC_TEST("clear") {
+TI_TEST("clear") {
   CoreState::set_trigger_gdb_when_crash(true);
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 32;
@@ -249,15 +249,15 @@ TC_TEST("clear") {
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < n; j++) {
         if (j < i)
-          TC_CHECK(x.val<int>(i, j) == i + j);
+          TI_CHECK(x.val<int>(i, j) == i + j);
         else
-          TC_CHECK(x.val<int>(i, j) == 0);
+          TI_CHECK(x.val<int>(i, j) == 0);
       }
     }
   }
 };
 
-TC_TEST("sort") {
+TI_TEST("sort") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 4;
     Program prog(arch);
@@ -273,7 +273,7 @@ TC_TEST("sort") {
     Global(c, i32);
     Global(coord, i32);
     Global(p, i32);
-    TC_P(particles);
+    TI_P(particles);
     layout([&]() {
       auto i = Index(0);
       auto j = Index(1);
@@ -309,12 +309,12 @@ TC_TEST("sort") {
     })();
 
     for (int i = 0; i < n * n; i++) {
-      TC_CHECK(c.val<int>(i) == count[i]);
+      TI_CHECK(c.val<int>(i) == count[i]);
     }
   }
 };
 
-TC_TEST("dilate") {
+TI_TEST("dilate") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     for (auto ds : {1}) {
       int n = 32;
@@ -350,16 +350,16 @@ TC_TEST("dilate") {
 
       for (int i = 0; i < n; i++) {
         int bid = i / bs;
-        TC_CHECK(x.val<int32>(i) == (1 <= bid && bid < 4));
+        TI_CHECK(x.val<int32>(i) == (1 <= bid && bid < 4));
       }
       for (int i = 0; i < n / bs; i++) {
-        TC_CHECK(y.val<int32>(i) == (1 <= i && i < 4));
+        TI_CHECK(y.val<int32>(i) == (1 <= i && i < 4));
       }
     }
   }
 };
 
-TC_TEST("dynamic_sort") {
+TI_TEST("dynamic_sort") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     int n = 4;
     Program prog(arch);
@@ -375,7 +375,7 @@ TC_TEST("dynamic_sort") {
     Global(c, i32);
     Global(coord, i32);
     Global(p, i32);
-    TC_P(particles);
+    TI_P(particles);
     layout([&]() {
       auto i = Index(0);
       auto j = Index(1);
@@ -414,7 +414,7 @@ TC_TEST("dynamic_sort") {
     })();
 
     for (int i = 0; i < n * n; i++) {
-      TC_CHECK(c.val<int>(i) == count[i]);
+      TI_CHECK(c.val<int>(i) == count[i]);
     }
   }
 };
@@ -434,7 +434,7 @@ auto reset_grid_benchmark = []() {
   auto i = Index(0), j = Index(1), k = Index(2);
 
   layout([&]() {
-    TC_ASSERT(n % grid_block_size == 0);
+    TI_ASSERT(n % grid_block_size == 0);
     auto &block = root.dense({i, j, k}, n / grid_block_size);
     constexpr bool block_soa = false;
     if (block_soa) {
@@ -449,7 +449,7 @@ auto reset_grid_benchmark = []() {
     }
   });
 
-  TC_ASSERT(bit::is_power_of_two(n));
+  TI_ASSERT(bit::is_power_of_two(n));
 
   auto &reset_grid = kernel([&]() {
     Declare(i);
@@ -464,8 +464,8 @@ auto reset_grid_benchmark = []() {
   });
 
   while (1)
-    TC_TIME(reset_grid());
+    TI_TIME(reset_grid());
 };
-TC_REGISTER_TASK(reset_grid_benchmark);
+TI_REGISTER_TASK(reset_grid_benchmark);
 
 TLANG_NAMESPACE_END
diff --git a/misc/obsolete_cpp_tests/gpu.cpp b/misc/obsolete_cpp_tests/gpu.cpp
index 3b34b3cdab72b..ea7d6a46e4222 100644
--- a/misc/obsolete_cpp_tests/gpu.cpp
+++ b/misc/obsolete_cpp_tests/gpu.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("compiler_basics_gpu") {
+TI_TEST("compiler_basics_gpu") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 128;
   Program prog(Arch::gpu);
@@ -24,18 +24,18 @@ TC_TEST("compiler_basics_gpu") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
+    TI_CHECK(a.val<int32>(i) == (2 - i % 2) * i);
   }
 };
 
-#if defined(CUDA_FOUND)
-TC_TEST("cuda_malloc_managed") {
+#if defined(TI_WITH_CUDA)
+TI_TEST("cuda_malloc_managed") {
   void *ptr;
   cudaMallocManaged(&ptr, 1LL << 40);
 
   int *data = (int *)ptr;
   for (int i = 0; i < 100000; i++) {
-    TC_CHECK(data[i * 749] == 0);
+    TI_CHECK(data[i * 749] == 0);
   }
   cudaFree(ptr);
 }
diff --git a/misc/obsolete_cpp_tests/hash.cpp b/misc/obsolete_cpp_tests/hash.cpp
index 6c8bf4e16bbb0..a8c1aaec439ee 100644
--- a/misc/obsolete_cpp_tests/hash.cpp
+++ b/misc/obsolete_cpp_tests/hash.cpp
@@ -3,7 +3,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("hash") {
+TI_TEST("hash") {
   for (auto arch : {Arch::gpu, Arch::x86_64}) {
     Program prog(arch);
     CoreState::set_trigger_gdb_when_crash(true);
@@ -22,10 +22,10 @@ TC_TEST("hash") {
     })();
 
     for (int i = 0; i < n * n / 2; i++) {
-      TC_CHECK(u.val<int32>(i) == i * 2);
+      TI_CHECK(u.val<int32>(i) == i * 2);
     }
     for (int i = n * n / 2; i < n * n; i++) {
-      TC_CHECK(u.val<int32>(i) == 0);
+      TI_CHECK(u.val<int32>(i) == 0);
     }
   }
 }
diff --git a/misc/obsolete_cpp_tests/micro_access_ops.cpp b/misc/obsolete_cpp_tests/micro_access_ops.cpp
index 031c033895cee..8f38699acd1b1 100644
--- a/misc/obsolete_cpp_tests/micro_access_ops.cpp
+++ b/misc/obsolete_cpp_tests/micro_access_ops.cpp
@@ -3,7 +3,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("lower_access_basics") {
+TI_TEST("lower_access_basics") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     for (auto vec : {1, 4, 8}) {
       CoreState::set_trigger_gdb_when_crash(true);
@@ -21,7 +21,7 @@ TC_TEST("lower_access_basics") {
       func();
 
       for (int i = 0; i < n; i++) {
-        TC_CHECK(a.val<int32>(i) == i);
+        TI_CHECK(a.val<int32>(i) == i);
       }
     }
   }
diff --git a/misc/obsolete_cpp_tests/opt.cpp b/misc/obsolete_cpp_tests/opt.cpp
index 39d539321f0f6..0fe75557e3779 100644
--- a/misc/obsolete_cpp_tests/opt.cpp
+++ b/misc/obsolete_cpp_tests/opt.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("access_simp") {
+TI_TEST("access_simp") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   Program prog(Arch::x86_64);
@@ -29,10 +29,10 @@ TC_TEST("access_simp") {
     });
   })();
 
-  TC_CHECK(sum.val<int32>() == 16);
+  TI_CHECK(sum.val<int32>() == 16);
 };
 
-TC_TEST("root_leaf_path_weakening") {
+TI_TEST("root_leaf_path_weakening") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 16;
   Program prog(Arch::x86_64);
diff --git a/misc/obsolete_cpp_tests/scalar_svd.h b/misc/obsolete_cpp_tests/scalar_svd.h
index 6dc470a7de91f..925f75c4e42b8 100644
--- a/misc/obsolete_cpp_tests/scalar_svd.h
+++ b/misc/obsolete_cpp_tests/scalar_svd.h
@@ -37,7 +37,7 @@ A. McAdams, A. Selle, R. Tamstorf, J. Teran and E. Sifakis
 // POSSIBILITY OF SUCH DAMAGE.
 //#####################################################################
 
-TC_FORCE_INLINE float rsqrt(const float f) {
+TI_FORCE_INLINE float rsqrt(const float f) {
   return 1.0f / std::sqrt(f);
 }
 
@@ -48,7 +48,7 @@ constexpr float Cosine_Pi_Over_Eight =
     0.9238795325112867f;  //.5 * sqrt(2. + sqrt(2.));
 
 template <int sweeps = 4>
-TC_FORCE_INLINE void svd(const float a11,
+TI_FORCE_INLINE void svd(const float a11,
                          const float a12,
                          const float a13,
                          const float a21,
diff --git a/misc/obsolete_cpp_tests/scratch_pad.cpp b/misc/obsolete_cpp_tests/scratch_pad.cpp
index 5a7e2e1a17d65..99ef7a3596c64 100644
--- a/misc/obsolete_cpp_tests/scratch_pad.cpp
+++ b/misc/obsolete_cpp_tests/scratch_pad.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("scratch_pad_bounds") {
+TI_TEST("scratch_pad_bounds") {
   Program prog;
 
   int N = 8;
@@ -22,26 +22,26 @@ TC_TEST("scratch_pad_bounds") {
 
   pad.access({1, 2, -3}, ScratchPad::AccessFlag::read);
 
-  TC_CHECK(pad.bounds[0][0] == 1);
-  TC_CHECK(pad.bounds[0][1] == 2);
-  TC_CHECK(pad.bounds[0][2] == -3);
+  TI_CHECK(pad.bounds[0][0] == 1);
+  TI_CHECK(pad.bounds[0][1] == 2);
+  TI_CHECK(pad.bounds[0][2] == -3);
 
-  TC_CHECK(pad.bounds[1][0] == 2);
-  TC_CHECK(pad.bounds[1][1] == 3);
-  TC_CHECK(pad.bounds[1][2] == -2);
+  TI_CHECK(pad.bounds[1][0] == 2);
+  TI_CHECK(pad.bounds[1][1] == 3);
+  TI_CHECK(pad.bounds[1][2] == -2);
 
   pad.access({4, -2, 5}, ScratchPad::AccessFlag::read);
 
-  TC_CHECK(pad.bounds[0][0] == 1);
-  TC_CHECK(pad.bounds[0][1] == -2);
-  TC_CHECK(pad.bounds[0][2] == -3);
+  TI_CHECK(pad.bounds[0][0] == 1);
+  TI_CHECK(pad.bounds[0][1] == -2);
+  TI_CHECK(pad.bounds[0][2] == -3);
 
-  TC_CHECK(pad.bounds[1][0] == 5);
-  TC_CHECK(pad.bounds[1][1] == 3);
-  TC_CHECK(pad.bounds[1][2] == 6);
+  TI_CHECK(pad.bounds[1][0] == 5);
+  TI_CHECK(pad.bounds[1][1] == 3);
+  TI_CHECK(pad.bounds[1][2] == 6);
 }
 
-TC_TEST("range_assumption") {
+TI_TEST("range_assumption") {
   CoreState::set_trigger_gdb_when_crash(true);
   Program prog(Arch::gpu);
 
@@ -70,7 +70,7 @@ TC_TEST("range_assumption") {
   })();
 };
 
-TC_TEST("scratch_pad_3d") {
+TI_TEST("scratch_pad_3d") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 10000000;
   Program prog(Arch::gpu);
@@ -157,14 +157,14 @@ TC_TEST("scratch_pad_3d") {
                     x_val(i, j + 1, k) - x_val(i - 1, j, k) -
                     x_val(i + 1, j, k);
           if (std::abs(gt - y.val<float32>(i, j, k)) > 1) {
-            TC_P(d);
-            TC_P(gt);
-            TC_P(y.val<float32>(i, j, k));
-            TC_P(i);
-            TC_P(j);
-            TC_P(k);
+            TI_P(d);
+            TI_P(gt);
+            TI_P(y.val<float32>(i, j, k));
+            TI_P(i);
+            TI_P(j);
+            TI_P(k);
           }
-          TC_CHECK_EQUAL(gt, y.val<float32>(i, j, k),
+          TI_CHECK_EQUAL(gt, y.val<float32>(i, j, k),
                          1e-1f / domain_size / domain_size);
         }
       }
diff --git a/misc/obsolete_cpp_tests/stencil1d.cpp b/misc/obsolete_cpp_tests/stencil1d.cpp
index e71fefcb38355..82a071527fac5 100644
--- a/misc/obsolete_cpp_tests/stencil1d.cpp
+++ b/misc/obsolete_cpp_tests/stencil1d.cpp
@@ -197,7 +197,7 @@ void benchmark_layers() {
     }
   };
 
-  TC_P(measure_cpe(test_hash_table, n));
+  TI_P(measure_cpe(test_hash_table, n));
 
   auto &tile = data.begin()->second;
 
@@ -210,13 +210,13 @@ void benchmark_layers() {
     }
   };
 
-  TC_P(measure_cpe(test_block, n));
-  TC_P(cnt);
+  TI_P(measure_cpe(test_block, n));
+  TI_P(cnt);
 }
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("stencil1d") {
+TI_TEST("stencil1d") {
   CoreState::set_trigger_gdb_when_crash(true);
   Program prog;
 
@@ -277,24 +277,24 @@ TC_TEST("stencil1d") {
         }
       }
     }
-    TC_P(total_tiles);
-    TC_P(total_blocks);
-    TC_P(total_nodes);
+    TI_P(total_tiles);
+    TI_P(total_blocks);
+    TI_P(total_nodes);
   }
   // benchmark_layers();
-  // TC_P(measure_cpe(stencil, total_nodes));
+  // TI_P(measure_cpe(stencil, total_nodes));
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(copy_ref());
+    TI_TIME(copy_ref());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(copy_optimized());
+    TI_TIME(copy_optimized());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(copy());
+    TI_TIME(copy());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(copy_parallelized());
+    TI_TIME(copy_parallelized());
 
   // test copy to x
   for (auto &it : data) {
@@ -305,25 +305,25 @@ TC_TEST("stencil1d") {
         continue;
       for (int n = 0; n < ::Block::size; n++) {
         int i = it.first * dim0 + b * dim1 + n;
-        TC_CHECK(block->nodes[n].y == y.val<float32>(i));
+        TI_CHECK(block->nodes[n].y == y.val<float32>(i));
       }
     }
   }
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(stencil_ref());
+    TI_TIME(stencil_ref());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(stencil_optimized());
+    TI_TIME(stencil_optimized());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(stencil_optimized2());
+    TI_TIME(stencil_optimized2());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(stencil_optimized3());
+    TI_TIME(stencil_optimized3());
 
   for (int i = 0; i < 10; i++)
-    TC_TIME(stencil());
+    TI_TIME(stencil());
 
   // test stencil to x
   for (auto &it : data) {
@@ -334,7 +334,7 @@ TC_TEST("stencil1d") {
         continue;
       for (int n = 0; n < ::Block::size; n++) {
         int i = it.first * dim0 + b * dim1 + n;
-        TC_CHECK_EQUAL(block->nodes[n].y, y.val<float32>(i), 1e-5f);
+        TI_CHECK_EQUAL(block->nodes[n].y, y.val<float32>(i), 1e-5f);
       }
     }
   }
diff --git a/misc/obsolete_cpp_tests/struct.cpp b/misc/obsolete_cpp_tests/struct.cpp
index 139572d07c24d..48432e732d77c 100644
--- a/misc/obsolete_cpp_tests/struct.cpp
+++ b/misc/obsolete_cpp_tests/struct.cpp
@@ -3,7 +3,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("snode") {
+TI_TEST("snode") {
   Program prog(Arch::x86_64);
 
   auto i = Index(0);
@@ -19,11 +19,11 @@ TC_TEST("snode") {
   }
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK_EQUAL(u.val<int32>(i), i + 1, 0);
+    TI_CHECK_EQUAL(u.val<int32>(i), i + 1, 0);
   }
 }
 
-TC_TEST("snode_loop") {
+TI_TEST("snode_loop") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     Program prog(arch);
     CoreState::set_trigger_gdb_when_crash(true);
@@ -43,12 +43,12 @@ TC_TEST("snode_loop") {
     })();
 
     for (int i = 0; i < n; i++) {
-      TC_CHECK_EQUAL(u.val<int32>(i), i * 2, 0);
+      TI_CHECK_EQUAL(u.val<int32>(i), i * 2, 0);
     }
   }
 }
 
-TC_TEST("snode_loop2") {
+TI_TEST("snode_loop2") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     Program prog(arch);
     CoreState::set_trigger_gdb_when_crash(true);
@@ -65,10 +65,10 @@ TC_TEST("snode_loop2") {
       root.dense(j, n).place(v);
     });
 
-    TC_ASSERT(
+    TI_ASSERT(
         u.cast<GlobalVariableExpression>()->snode->physical_index_position[0] ==
         0);
-    TC_ASSERT(
+    TI_ASSERT(
         v.cast<GlobalVariableExpression>()->snode->physical_index_position[0] ==
         1);
 
@@ -83,13 +83,13 @@ TC_TEST("snode_loop2") {
     })();
 
     for (int i = 0; i < n; i++) {
-      TC_CHECK_EQUAL(u.val<int32>(i), i * 2, 0);
-      TC_CHECK_EQUAL(v.val<int32>(i), i * 3, 0);
+      TI_CHECK_EQUAL(u.val<int32>(i), i * 2, 0);
+      TI_CHECK_EQUAL(v.val<int32>(i), i * 3, 0);
     }
   }
 }
 
-TC_TEST("2d_blocked_array") {
+TI_TEST("2d_blocked_array") {
   int n = 8, block_size = 4;
 
   for (auto arch : {Arch::x86_64, Arch::gpu})
@@ -103,7 +103,7 @@ TC_TEST("2d_blocked_array") {
         auto i = Index(0);
         auto j = Index(1);
         if (blocked) {
-          TC_ASSERT(n % block_size == 0);
+          TI_ASSERT(n % block_size == 0);
           root.dense({i, j}, {n / block_size, n * 2 / block_size})
               .dense({i, j}, {block_size, block_size})
               .place(a, b);
@@ -124,14 +124,14 @@ TC_TEST("2d_blocked_array") {
 
       for (int i = 0; i < n; i++) {
         for (int j = 0; j < n * 2; j++) {
-          TC_CHECK(a.val<int32>(i, j) == i + j * 3);
-          TC_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
+          TI_CHECK(a.val<int32>(i, j) == i + j * 3);
+          TI_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
         }
       }
     }
 }
 
-TC_TEST("2d_blocked_array_morton") {
+TI_TEST("2d_blocked_array_morton") {
   int n = 16, block_size = 4;
 
   for (auto arch : {Arch::x86_64}) {
@@ -143,7 +143,7 @@ TC_TEST("2d_blocked_array_morton") {
     layout([&] {
       auto i = Index(0);
       auto j = Index(1);
-      TC_ASSERT(n % block_size == 0);
+      TI_ASSERT(n % block_size == 0);
       root.dense({i, j}, {n / block_size, n / block_size})
           .morton()
           .dense({i, j}, {block_size, block_size})
@@ -160,14 +160,14 @@ TC_TEST("2d_blocked_array_morton") {
 
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < n; j++) {
-        TC_CHECK(a.val<int32>(i, j) == i + j * 3);
-        TC_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
+        TI_CHECK(a.val<int32>(i, j) == i + j * 3);
+        TI_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
       }
     }
   }
 }
 
-TC_TEST("bitmask_clear") {
+TI_TEST("bitmask_clear") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 256, block_size = 16;
 
@@ -180,7 +180,7 @@ TC_TEST("bitmask_clear") {
     layout([&] {
       auto i = Index(0);
       auto j = Index(1);
-      TC_ASSERT(n % block_size == 0);
+      TI_ASSERT(n % block_size == 0);
       root.dense({i, j}, {n / block_size, n / block_size})
           .bitmasked()
           .dense({i, j}, {block_size, block_size})
@@ -197,11 +197,11 @@ TC_TEST("bitmask_clear") {
 
     for (int i = 0; i < n / 2; i++) {
       for (int j = n / 2; j < n; j++) {
-        TC_CHECK(a.val<int32>(i, j) == i + j * 3);
-        TC_CHECK(b.val<int32>(i, j) == i + j * 3 + 1);
+        TI_CHECK(a.val<int32>(i, j) == i + j * 3);
+        TI_CHECK(b.val<int32>(i, j) == i + j * 3 + 1);
       }
     }
-    TC_CHECK(b.val<int32>(1, 1) == 0);
+    TI_CHECK(b.val<int32>(1, 1) == 0);
 
     a.parent().snode()->clear_data();
 
@@ -209,11 +209,11 @@ TC_TEST("bitmask_clear") {
 
     for (int i = 0; i < n / 2; i++) {
       for (int j = n / 2; j < n; j++) {
-        TC_CHECK(a.val<int32>(i, j) == 0);
-        TC_CHECK(b.val<int32>(i, j) == i);
+        TI_CHECK(a.val<int32>(i, j) == 0);
+        TI_CHECK(b.val<int32>(i, j) == i);
       }
     }
-    TC_CHECK(b.val<int32>(block_size + 1, 1) == 0);
+    TI_CHECK(b.val<int32>(block_size + 1, 1) == 0);
 
     a.parent().snode()->clear_data_and_deactivate();
 
@@ -221,15 +221,15 @@ TC_TEST("bitmask_clear") {
 
     for (int i = 0; i < n / 2; i++) {
       for (int j = n / 2; j < n; j++) {
-        TC_CHECK(a.val<int32>(i, j) == 0);
-        TC_CHECK(b.val<int32>(i, j) == 0);
+        TI_CHECK(a.val<int32>(i, j) == 0);
+        TI_CHECK(b.val<int32>(i, j) == 0);
       }
     }
-    TC_CHECK(b.val<int32>(block_size + 1, 1) == 0);
+    TI_CHECK(b.val<int32>(block_size + 1, 1) == 0);
   }
 }
 
-TC_TEST("2d_blocked_array_bitmasked") {
+TI_TEST("2d_blocked_array_bitmasked") {
   int n = 16, block_size = 4;
 
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
@@ -245,7 +245,7 @@ TC_TEST("2d_blocked_array_bitmasked") {
       layout([&] {
         auto i = Index(0);
         auto j = Index(1);
-        TC_ASSERT(n % block_size == 0);
+        TI_ASSERT(n % block_size == 0);
         root.dense({i, j}, {n / block_size, n / block_size})
             .morton(morton)
             .bitmasked()
@@ -264,16 +264,16 @@ TC_TEST("2d_blocked_array_bitmasked") {
 
       for (int i = 0; i < n / 2; i++) {
         for (int j = n / 2; j < n; j++) {
-          TC_CHECK(a.val<int32>(i, j) == i + j * 3);
-          TC_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
+          TI_CHECK(a.val<int32>(i, j) == i + j * 3);
+          TI_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
         }
       }
-      TC_CHECK(b.val<int32>(1, 1) == 0);
+      TI_CHECK(b.val<int32>(1, 1) == 0);
     }
   }
 }
 
-TC_TEST("2d_blocked_array_vec") {
+TI_TEST("2d_blocked_array_vec") {
   int n = 8, block_size = 4;
 
   for (auto arch : {Arch::x86_64})
@@ -287,7 +287,7 @@ TC_TEST("2d_blocked_array_vec") {
         auto i = Index(0);
         auto j = Index(1);
         if (blocked) {
-          TC_ASSERT(n % block_size == 0);
+          TI_ASSERT(n % block_size == 0);
           root.dense({i, j}, {n / block_size, n * 2 / block_size})
               .dense({i, j}, {block_size, block_size})
               .place(a, b);
@@ -310,14 +310,14 @@ TC_TEST("2d_blocked_array_vec") {
 
       for (int i = 0; i < n; i++) {
         for (int j = 0; j < n * 2; j++) {
-          TC_CHECK(a.val<int32>(i, j) == i + j * 3);
-          TC_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
+          TI_CHECK(a.val<int32>(i, j) == i + j * 3);
+          TI_CHECK(b.val<int32>(i, j) == i * 2 + j * 3);
         }
       }
     }
 }
 
-TC_TEST("loop_over_blocks") {
+TI_TEST("loop_over_blocks") {
   int n = 64, block_size = 4;
 
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
@@ -329,7 +329,7 @@ TC_TEST("loop_over_blocks") {
 
     layout([&] {
       auto ij = Indices(0, 1);
-      TC_ASSERT(n % block_size == 0);
+      TI_ASSERT(n % block_size == 0);
       root.dense(ij, n / block_size)
           .dense(ij, {block_size, block_size * 2})
           .place(a);
@@ -351,18 +351,18 @@ TC_TEST("loop_over_blocks") {
         sum_j_gt += j;
       }
     }
-    TC_CHECK(sum_i.val<int32>() == sum_i_gt);
-    TC_CHECK(sum_j.val<int32>() == sum_j_gt);
+    TI_CHECK(sum_i.val<int32>() == sum_i_gt);
+    TI_CHECK(sum_j.val<int32>() == sum_j_gt);
   }
 }
 
 #if (0)
-TC_TEST("spmv") {
+TI_TEST("spmv") {
   initialize_benchmark();
   int n = 8192;
   int band = 256;
   int k = 128;
-  TC_ASSERT(k <= band);
+  TI_ASSERT(k <= band);
   int m = n * k;
 
   Eigen::SparseMatrix<float32, Eigen::RowMajor> M(n, n);
@@ -430,31 +430,31 @@ TC_TEST("spmv") {
     vec_val.val<float32>(i) = val;
   }
 
-  TC_TIME(M.setFromTriplets(entries.begin(), entries.end()));
+  TI_TIME(M.setFromTriplets(entries.begin(), entries.end()));
 
-  TC_TIME(populate());
+  TI_TIME(populate());
 
   int T = 1;
   for (int i = 0; i < T; i++) {
-    TC_TIME(matvecmul());
+    TI_TIME(matvecmul());
   }
 
-  TC_P(n = Eigen::nbThreads());
+  TI_P(n = Eigen::nbThreads());
   for (int i = 0; i < T; i++) {
-    TC_TIME(Vret = M * V);
+    TI_TIME(Vret = M * V);
   }
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK_EQUAL(Vret(i), result.val<float32>(i) / T, 1e-3_f);
+    TI_CHECK_EQUAL(Vret(i), result.val<float32>(i) / T, 1e-3_f);
   }
 }
 
-TC_TEST("spmv_dynamic") {
+TI_TEST("spmv_dynamic") {
   initialize_benchmark();
   int n = 8192;
   int band = 256;
   int k = 128;
-  TC_ASSERT(k <= band);
+  TI_ASSERT(k <= band);
   int m = n * k;
 
   Eigen::SparseMatrix<float32, Eigen::RowMajor> M(n, n);
@@ -524,27 +524,27 @@ TC_TEST("spmv_dynamic") {
     vec_val.val<float32>(i) = val;
   }
 
-  TC_TIME(M.setFromTriplets(entries.begin(), entries.end()));
+  TI_TIME(M.setFromTriplets(entries.begin(), entries.end()));
 
-  TC_TIME(populate());
+  TI_TIME(populate());
 
   int T = 30;
   for (int i = 0; i < T; i++) {
-    TC_TIME(matvecmul());
+    TI_TIME(matvecmul());
   }
 
-  TC_P(n = Eigen::nbThreads());
+  TI_P(n = Eigen::nbThreads());
   for (int i = 0; i < T; i++) {
-    TC_TIME(Vret = M * V);
+    TI_TIME(Vret = M * V);
   }
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK_EQUAL(Vret(i), result.val<float32>(i) / T, 1e-3_f);
+    TI_CHECK_EQUAL(Vret(i), result.val<float32>(i) / T, 1e-3_f);
   }
 }
 
 // array of linked list
-TC_TEST("indirect") {
+TI_TEST("indirect") {
   Program prog;
 
   int n = 4;
@@ -586,12 +586,12 @@ TC_TEST("indirect") {
 
   for (int i = 0; i < n; i++) {
     auto reduced = sum.val<int32>(i);
-    TC_CHECK(reduced == (i * k + (i + 1) * k + 1) * k / 2);
+    TI_CHECK(reduced == (i * k + (i + 1) * k + 1) * k / 2);
   }
 }
 #endif
 
-TC_TEST("leaf_context") {
+TI_TEST("leaf_context") {
   Program prog;
 
   int n = 64;
@@ -620,10 +620,10 @@ TC_TEST("leaf_context") {
     For(i, a, [&] { sum[Expr(0)] += a[i]; });
   })();
 
-  TC_CHECK(sum.val<int32>() == sum_gt);
+  TI_CHECK(sum.val<int32>() == sum_gt);
 }
 
-TC_TEST("pointer") {
+TI_TEST("pointer") {
   for (auto arch : {Arch::x86_64, Arch::gpu}) {
     Program prog(arch);
 
@@ -651,11 +651,11 @@ TC_TEST("pointer") {
     kernel([&]() { For(a, [&](Expr i) { Atomic(sum[Expr(0)]) += a[i]; }); })();
 
     auto reduced = sum.val<int32>();
-    TC_CHECK(reduced == sum_gt);
+    TI_CHECK(reduced == sum_gt);
   }
 }
 
-TC_TEST("gpu_listgen") {
+TI_TEST("gpu_listgen") {
   for (auto arch : {Arch::gpu}) {
     for (auto level : {1, 2, 3}) {
       Program prog(arch);
@@ -691,12 +691,12 @@ TC_TEST("gpu_listgen") {
           [&]() { For(a, [&](Expr i) { Atomic(sum[Expr(0)]) += a[i]; }); })();
 
       auto reduced = sum.val<int32>();
-      TC_CHECK(reduced == sum_gt);
+      TI_CHECK(reduced == sum_gt);
     }
   }
 }
 
-TC_TEST("misaligned") {
+TI_TEST("misaligned") {
   // On the same tree, x has indices i while y has indices i & j
   Program prog;
 
@@ -730,11 +730,11 @@ TC_TEST("misaligned") {
   })();
 
   for (int i = 0; i < n; i++) {
-    TC_CHECK(x_gt[i] == x.val<int32>(i));
+    TI_CHECK(x_gt[i] == x.val<int32>(i));
   }
 }
 
-TC_TEST("hashed") {
+TI_TEST("hashed") {
   Program prog;
 
   int n = 64;
@@ -766,10 +766,10 @@ TC_TEST("hashed") {
   })();
 
   auto reduced = sum.val<int32>();
-  TC_CHECK(reduced == sum_gt);
+  TI_CHECK(reduced == sum_gt);
 }
 
-TC_TEST("mpm_layout") {
+TI_TEST("mpm_layout") {
   Program prog(Arch::gpu);
   // Program prog(Arch::x86_64);
   // prog.config.print_ir = true;
@@ -823,7 +823,7 @@ TC_TEST("mpm_layout") {
       place(particle_v(i));
     place(particle_J);
 
-    TC_ASSERT(n % grid_block_size == 0);
+    TI_ASSERT(n % grid_block_size == 0);
     auto &block = root.dense({i, j, k}, grid_n / 4 / grid_block_size)
                       .pointer()
                       .dense({i, j, k}, 4)
diff --git a/misc/obsolete_cpp_tests/svd.cpp b/misc/obsolete_cpp_tests/svd.cpp
index c763568619efa..f54dbabf387fb 100644
--- a/misc/obsolete_cpp_tests/svd.cpp
+++ b/misc/obsolete_cpp_tests/svd.cpp
@@ -43,7 +43,7 @@ void sifakis_svd_gt(Matrix3 &a, Matrix3 &u, Matrix3 &v, Vector3 &sig) {
   // clang-format on
 }
 
-TC_TEST("svd_scalar") {
+TI_TEST("svd_scalar") {
   using Matrix = TMatrix<float32, 3>;
   using Vector = TVector<float32, 3>;
   float32 tolerance = 2e-3_f32;
@@ -54,30 +54,30 @@ TC_TEST("svd_scalar") {
 
     sifakis_svd_gt<6>(m, U, V, sig_vec);
     sig = Matrix(sig_vec);
-    TC_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
-    TC_CHECK_EQUAL(Matrix(1), U * transposed(U), tolerance);
-    TC_CHECK_EQUAL(Matrix(1), V * transposed(V), tolerance);
-    TC_CHECK_EQUAL(sig, Matrix(sig.diag()), tolerance);
+    TI_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
+    TI_CHECK_EQUAL(Matrix(1), U * transposed(U), tolerance);
+    TI_CHECK_EQUAL(Matrix(1), V * transposed(V), tolerance);
+    TI_CHECK_EQUAL(sig, Matrix(sig.diag()), tolerance);
 
     /*
     if (dim == 2) {
       qr_decomp(m, Q, R);
-      TC_CHECK_EQUAL(m, Q * R, tolerance);
-      TC_CHECK_EQUAL(Q * transposed(Q), Matrix(1), tolerance);
+      TI_CHECK_EQUAL(m, Q * R, tolerance);
+      TI_CHECK_EQUAL(Q * transposed(Q), Matrix(1), tolerance);
       CHECK(abs(R[0][1]) < 1e-6_f);
       CHECK(R[0][0] > -1e-6_f);
       CHECK(R[1][1] > -1e-6_f);
     }
 
     polar_decomp(m, R, S);
-    TC_CHECK_EQUAL(m, R * S, tolerance);
-    TC_CHECK_EQUAL(Matrix(1), R * transposed(R), tolerance);
-    TC_CHECK_EQUAL(S, transposed(S), tolerance);
+    TI_CHECK_EQUAL(m, R * S, tolerance);
+    TI_CHECK_EQUAL(Matrix(1), R * transposed(R), tolerance);
+    TI_CHECK_EQUAL(S, transposed(S), tolerance);
     */
   }
 }
 
-TC_TEST("svd_dsl") {
+TI_TEST("svd_dsl") {
   for (auto vec : {1}) {
     CoreState::set_trigger_gdb_when_crash(true);
     using TMat = TMatrix<float32, 3>;
@@ -136,15 +136,15 @@ TC_TEST("svd_dsl") {
         sig(p, p) = gSigma(p).val<float32>(i);
       }
 
-      TC_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
-      TC_CHECK_EQUAL(TMat(1), U * transposed(U), tolerance);
-      TC_CHECK_EQUAL(TMat(1), V * transposed(V), tolerance);
-      TC_CHECK_EQUAL(sig, TMat(sig.diag()), tolerance);
+      TI_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
+      TI_CHECK_EQUAL(TMat(1), U * transposed(U), tolerance);
+      TI_CHECK_EQUAL(TMat(1), V * transposed(V), tolerance);
+      TI_CHECK_EQUAL(sig, TMat(sig.diag()), tolerance);
     }
   }
 }
 
-TC_TEST("svd_dsl_float64") {
+TI_TEST("svd_dsl_float64") {
   for (auto vec : {1}) {
     CoreState::set_trigger_gdb_when_crash(true);
     using TMat = TMatrix<float32, 3>;
@@ -204,10 +204,10 @@ TC_TEST("svd_dsl_float64") {
         sig(p, p) = gSigma(p).val<float64>(i);
       }
 
-      TC_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
-      TC_CHECK_EQUAL(TMat(1), U * transposed(U), tolerance);
-      TC_CHECK_EQUAL(TMat(1), V * transposed(V), tolerance);
-      TC_CHECK_EQUAL(sig, TMat(sig.diag()), tolerance);
+      TI_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
+      TI_CHECK_EQUAL(TMat(1), U * transposed(U), tolerance);
+      TI_CHECK_EQUAL(TMat(1), V * transposed(V), tolerance);
+      TI_CHECK_EQUAL(sig, TMat(sig.diag()), tolerance);
     }
   }
 }
diff --git a/misc/obsolete_cpp_tests/test_linalg.cpp b/misc/obsolete_cpp_tests/test_linalg.cpp
index f211e3b6bd845..a24c94f74b658 100644
--- a/misc/obsolete_cpp_tests/test_linalg.cpp
+++ b/misc/obsolete_cpp_tests/test_linalg.cpp
@@ -6,7 +6,7 @@
 #include <taichi/util.h>
 #include <taichi/testing.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <int dim, typename T>
 void test_matrix() {
@@ -16,16 +16,16 @@ void test_matrix() {
     Matrix m = Matrix::rand();
     if (determinant(m) > tolerance * 1e3_f) {
       if (!math::equal(m * inversed(m), Matrix(1), tolerance)) {
-        TC_P(m * inversed(m) - Matrix(1));
-        TC_P(math::abs(m * inversed(m) - Matrix(1)));
-        TC_P(math::maximum(math::abs(m * inversed(m) - Matrix(1))));
+        TI_P(m * inversed(m) - Matrix(1));
+        TI_P(math::abs(m * inversed(m) - Matrix(1)));
+        TI_P(math::maximum(math::abs(m * inversed(m) - Matrix(1))));
       }
-      TC_CHECK_EQUAL(m * inversed(m), Matrix(1), tolerance);
+      TI_CHECK_EQUAL(m * inversed(m), Matrix(1), tolerance);
     }
   }
 }
 
-TC_TEST("vector arith") {
+TI_TEST("vector arith") {
   Vector3 a(1, 2, 3), b(4, 2, 5);
   CHECK(a + b == Vector3(5, 4, 8));
   CHECK(b - a == Vector3(3, 0, 2));
@@ -46,7 +46,7 @@ TC_TEST("vector arith") {
   CHECK(c + d == Vector2(3, 7));
 
   CHECK(Vector4(1, 2, 3, 1).length2() == 15.0_f);
-#if !defined(TC_USE_DOUBLE) && !defined(TC_ISE_NONE)
+#if !defined(TI_USE_DOUBLE) && !defined(TI_ISE_NONE)
   CHECK(Vector3(1, 2, 3, 1).length2() == 14.0_f);
 #endif
   CHECK(dot(Vector2(1, 2), Vector2(3, 2)) == 7.0_f);
@@ -88,4 +88,4 @@ TC_TEST("vector arith") {
                 "std::string is not VectorND.");
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/misc/obsolete_cpp_tests/test_system.cpp b/misc/obsolete_cpp_tests/test_system.cpp
index 4e8f5d1a429dd..a00958c211d32 100644
--- a/misc/obsolete_cpp_tests/test_system.cpp
+++ b/misc/obsolete_cpp_tests/test_system.cpp
@@ -7,9 +7,9 @@
 #include <taichi/testing.h>
 #include <taichi/system/virtual_memory.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-TC_TEST("Virtual Memory") {
+TI_TEST("Virtual Memory") {
   for (int i = 0; i < 3; i++) {
     // Allocate 1 TB of virtual memory
     std::size_t size = 1LL << 40;
@@ -23,4 +23,4 @@ TC_TEST("Virtual Memory") {
   }
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/misc/obsolete_cpp_tests/types.cpp b/misc/obsolete_cpp_tests/types.cpp
index 78b7eb83bcbb3..5dde418c4d2b7 100644
--- a/misc/obsolete_cpp_tests/types.cpp
+++ b/misc/obsolete_cpp_tests/types.cpp
@@ -4,7 +4,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-TC_TEST("float64") {
+TI_TEST("float64") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 32;
   Program prog(Arch::gpu);
@@ -20,12 +20,12 @@ TC_TEST("float64") {
 
   func();
 
-  // TC_CHECK(sum.val<int>() == n);
+  // TI_CHECK(sum.val<int>() == n);
   for (int i = 0; i < n; i++)
-    TC_CHECK(a.val<float64>(i) == i * 2);
+    TI_CHECK(a.val<float64>(i) == i * 2);
 };
 
-TC_TEST("llvm_exception") {
+TI_TEST("llvm_exception") {
   CoreState::set_trigger_gdb_when_crash(true);
   int n = 1;
   for (int i = 0; i < 2; i++) {
@@ -41,7 +41,7 @@ TC_TEST("llvm_exception") {
   try {
     throw IRModified();
   } catch (IRModified) {
-    TC_TAG;
+    TI_TAG;
   }
 };
 
diff --git a/python/taichi/core/util.py b/python/taichi/core/util.py
index 7a3f31090803d..64a56e83d2638 100644
--- a/python/taichi/core/util.py
+++ b/python/taichi/core/util.py
@@ -50,7 +50,7 @@ def locale_encode(s):
 
 
 def is_ci():
-  return os.environ.get('TC_CI', '') == '1'
+  return os.environ.get('TI_CI', '') == '1'
 
 
 def package_root():
diff --git a/python/taichi/lang/__init__.py b/python/taichi/lang/__init__.py
index 5040d1c268820..62592a39f0cae 100644
--- a/python/taichi/lang/__init__.py
+++ b/python/taichi/lang/__init__.py
@@ -45,7 +45,7 @@ def reset():
   global runtime
   runtime = get_runtime()
 
-def init(default_fp=None, default_ip=None, print_processed=None, debug=None, **kwargs):
+def init(default_fp=None, default_ip=None, print_preprocessed=None, debug=None, **kwargs):
   if debug is None:
     debug = bool(int(os.environ.get('TI_DEBUG', '0')))
 
@@ -61,8 +61,8 @@ def init(default_fp=None, default_ip=None, print_processed=None, debug=None, **k
     ti.get_runtime().set_default_fp(default_fp)
   if default_ip is not None:
     ti.get_runtime().set_default_ip(default_ip)
-  if print_processed is not None:
-    ti.get_runtime().print_preprocessed = print_processed
+  if print_preprocessed is not None:
+    ti.get_runtime().print_preprocessed = print_preprocessed
   if debug:
     ti.set_logging_level(ti.DEBUG)
   ti.cfg.debug = debug
diff --git a/python/taichi/lang/snode.py b/python/taichi/lang/snode.py
index 0f12bd9413c07..348732f2e6023 100644
--- a/python/taichi/lang/snode.py
+++ b/python/taichi/lang/snode.py
@@ -8,15 +8,22 @@ def dense(self, indices, dimensions):
       dimensions = [dimensions] * len(indices)
     return SNode(self.ptr.dense(indices, dimensions))
 
+  def pointer(self, indices, dimensions):
+    if isinstance(dimensions, int):
+      dimensions = [dimensions] * len(indices)
+    return SNode(self.ptr.pointer(indices, dimensions))
+
+  def hash(self, indices, dimensions):
+    if isinstance(dimensions, int):
+      dimensions = [dimensions] * len(indices)
+    return SNode(self.ptr.hash(indices, dimensions))
+
   def dynamic(self, index, dimension, chunk_size=None):
     assert len(index) == 1
     if chunk_size is None:
       chunk_size = dimension
     return SNode(self.ptr.dynamic(index[0], dimension, chunk_size))
 
-  def pointer(self):
-    return SNode(self.ptr.pointer())
-
   def bitmasked(self, val=True):
     self.ptr.bitmasked(val)
     return self
@@ -51,10 +58,10 @@ def get_shape(self, i):
   def loop_range(self):
     import taichi as ti
     return ti.Expr(ti.core.global_var_expr_from_snode(self.ptr))
-  
+
   def snode(self):
     return self
-  
+
   def get_children(self):
     children = []
     for i in range(self.ptr.get_num_ch()):
diff --git a/python/taichi/main.py b/python/taichi/main.py
index af98e5ec7162b..aab2ac686d372 100644
--- a/python/taichi/main.py
+++ b/python/taichi/main.py
@@ -6,16 +6,24 @@
 from taichi.tools.video import make_video, interpolate_frames, mp4_to_gif, scale_video, crop_video, accelerate_video
 
 
-def test_python(verbose=False):
+def test_python(test_files=None, verbose=False):
   print("\nRunning python tests...\n")
   import taichi as ti
   import pytest
+  test_dir = None
   if ti.is_release():
-    test_dir = os.path.join(ti.package_root(), 'tests')
+    test_dir = ti.package_root()
   else:
-    test_dir = os.path.join(ti.get_repo_directory(), 'tests')
-
-  args = [test_dir]
+    test_dir = ti.get_repo_directory()
+  test_dir = os.path.join(test_dir, 'tests', 'python')
+  args = []
+  if test_files:
+    # run individual tests
+    for f in test_files:
+      args.append(os.path.join(test_dir, f))
+  else:
+    # run all the tests
+    args = [test_dir]
   if verbose:
     args += ['-s']
 
@@ -92,7 +100,7 @@ def main(debug=False):
       script = script.read()
     exec(script, {'__name__': '__main__'})
   elif mode == "test_python":
-    return test_python()
+    return test_python(test_files=sys.argv[2:])
   elif mode == "test_cpp":
     return test_cpp()
   elif mode == "test":
@@ -100,7 +108,7 @@ def main(debug=False):
       return -1
     return test_cpp()
   elif mode == "test_verbose":
-    if test_python(True) != 0:
+    if test_python(verbose=True) != 0:
       return -1
     return test_cpp()
   elif mode == "build":
diff --git a/python/taichi/tools/messager.py b/python/taichi/tools/messager.py
index eebbd533e1100..069ae610e0c44 100644
--- a/python/taichi/tools/messager.py
+++ b/python/taichi/tools/messager.py
@@ -16,9 +16,9 @@ def send_crash_report(message, receiver=None):
     return
   emailed = True
   if receiver is None:
-    receiver = os.environ.get('TC_MONITOR_EMAIL', None)
+    receiver = os.environ.get('TI_MONITOR_EMAIL', None)
   if receiver is None:
-    tc.warning('No receiver in $TC_MONITOR_EMAIL')
+    tc.warning('No receiver in $TI_MONITOR_EMAIL')
     return
   tc.warning('Emailing {}'.format(receiver))
   TO = receiver
diff --git a/taichi/analysis/value_diff.cpp b/taichi/analysis/value_diff.cpp
index 672d210a4dd1a..6b1a071ed23e4 100644
--- a/taichi/analysis/value_diff.cpp
+++ b/taichi/analysis/value_diff.cpp
@@ -48,7 +48,7 @@ class ValueDiff : public IRVisitor {
 
   void visit(ElementShuffleStmt *stmt) override {
     int old_lane = lane;
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto src = stmt->elements[lane].stmt;
     lane = stmt->elements[lane].index;
     src->accept(this);
diff --git a/taichi/arch.h b/taichi/arch.h
index aa0e12c09f5c0..24e941b6afb46 100644
--- a/taichi/arch.h
+++ b/taichi/arch.h
@@ -21,7 +21,7 @@ inline std::string arch_name(Arch arch) {
 #include "inc/archs.inc.h"
 #undef PER_ARCH
     default:
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
   }
 }
 
diff --git a/taichi/backends/base.cpp b/taichi/backends/base.cpp
index 701e863dcab7e..26bd63900eb09 100644
--- a/taichi/backends/base.cpp
+++ b/taichi/backends/base.cpp
@@ -1,7 +1,7 @@
 // The code generator base class
 
 #include "base.h"
-#if !defined(TC_PLATFORM_WINDOWS)
+#if !defined(TI_PLATFORM_WINDOWS)
 #include <xxhash.h>
 #endif
 #include <sstream>
@@ -14,7 +14,7 @@ std::string CodeGenBase::get_source_path() {
 }
 
 std::string CodeGenBase::get_library_path() {
-#if defined(TC_PLATFORM_OSX)
+#if defined(TI_PLATFORM_OSX)
   // Note: use .so here will lead to wired behavior...
   return fmt::format("{}/tmp{:04d}.dylib", folder, id);
 #else
@@ -28,7 +28,7 @@ void CodeGenBase::write_source() {
     std::string firstline;
     std::getline(ifs, firstline);
     if (firstline.find("debug") != firstline.npos) {
-      TC_WARN("Debugging file {}. Code overridden.", get_source_path());
+      TI_WARN("Debugging file {}. Code overridden.", get_source_path());
       return;
     }
   }
@@ -42,24 +42,24 @@ void CodeGenBase::write_source() {
 }
 
 std::string CodeGenBase::get_source() {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
   return "";
 }
 
 void CodeGenBase::load_dll() {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
   dll = dlopen((get_library_path()).c_str(), RTLD_LAZY);
   if (dll == nullptr) {
-    TC_ERROR("{}", dlerror());
+    TI_ERROR("{}", dlerror());
   }
-  TC_ASSERT(dll != nullptr);
+  TI_ASSERT(dll != nullptr);
 #else
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 #endif
 }
 
 void CodeGenBase::disassemble() {
-#if defined(TC_PLATFORM_LINUX)
+#if defined(TI_PLATFORM_LINUX)
   auto objdump_ret = system(fmt::format("objdump {} -d > {}.s",
                                         get_library_path(), get_library_path())
                                 .c_str());
@@ -91,16 +91,16 @@ void CodeGenBase::generate_binary(std::string extra_flags) {
         get_current_program()
             .config.preprocess_cmd(get_source_path(), pp_fn, extra_flags, true)
             .c_str()));
-    TC_ERROR("Preprocessing failed.");
+    TI_ERROR("Preprocessing failed.");
   }
   std::ifstream ifs(pp_fn);
-  TC_ASSERT(ifs);
+  TI_ASSERT(ifs);
   auto hash_input =
       preprocess_cmd + std::string(std::istreambuf_iterator<char>(ifs),
                                    std::istreambuf_iterator<char>());
-  // TC_P(preprocess_cmd);
+  // TI_P(preprocess_cmd);
   auto hash = XXH64(hash_input.data(), hash_input.size(), 0);
-  // TC_P(hash);
+  // TI_P(hash);
 
   std::string cached_binary_fn = db_folder() + fmt::format("/{}.so", hash);
   std::ifstream key_file(cached_binary_fn);
@@ -117,11 +117,11 @@ void CodeGenBase::generate_binary(std::string extra_flags) {
         get_source_path(), get_library_path(), extra_flags);
     auto compile_ret = std::system(cmd.c_str());
     if (compile_ret != 0) {
-      TC_WARN("Compilation cmd: {}", cmd);
+      TI_WARN("Compilation cmd: {}", cmd);
       auto cmd = get_current_program().config.compile_cmd(
           get_source_path(), get_library_path(), extra_flags, true);
       trash(std::system(cmd.c_str()));
-      TC_ERROR("Source {} compilation failed.", get_source_path());
+      TI_ERROR("Source {} compilation failed.", get_source_path());
     } else {
       trash(std::system(
           fmt::format("cp {} {}", get_library_path(), cached_binary_fn)
@@ -129,9 +129,9 @@ void CodeGenBase::generate_binary(std::string extra_flags) {
     }
   }
   trash(std::system(fmt::format("rm {}", pp_fn).c_str()));
-  TC_INFO("Compilation time: {:.1f} ms", 1000 * (Time::get_time() - t));
+  TI_INFO("Compilation time: {:.1f} ms", 1000 * (Time::get_time() - t));
 #else
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 #endif
 }
 
diff --git a/taichi/backends/base.h b/taichi/backends/base.h
index b83dfee9323ac..7d57eb2ec0dae 100644
--- a/taichi/backends/base.h
+++ b/taichi/backends/base.h
@@ -5,7 +5,7 @@
 #include "../snode.h"
 #include "../ir.h"
 #include "../program.h"
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
 #include <dlfcn.h>
 #endif
 
@@ -68,7 +68,7 @@ class CodeGenBase {
 
   static int get_kernel_id() {
     static int id = 0;
-    TC_ASSERT(id < 10000);
+    TI_ASSERT(id < 10000);
     return id++;
   }
 
@@ -112,16 +112,16 @@ class CodeGenBase {
 
   template <typename T>
   T load_function(std::string name) {
-#if !defined(TC_PLATFORM_WINDOWS)
+#if !defined(TI_PLATFORM_WINDOWS)
     using FP = decltype(function_pointer_helper(std::declval<T>()));
     if (dll == nullptr) {
       load_dll();
     }
     auto ret = dlsym(dll, name.c_str());
-    TC_ASSERT(ret != nullptr);
+    TI_ASSERT(ret != nullptr);
     return T((FP)ret);
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   }
 
diff --git a/taichi/backends/codegen_cuda.cpp b/taichi/backends/codegen_cuda.cpp
index 45e4010a4b103..289aad2ed1023 100644
--- a/taichi/backends/codegen_cuda.cpp
+++ b/taichi/backends/codegen_cuda.cpp
@@ -41,7 +41,7 @@ class GPUIRCodeGen : public IRVisitor {
   }
 
   void struct_for(Stmt *for_stmt_) {
-    TC_ASSERT_INFO(current_struct_for == nullptr,
+    TI_ASSERT_INFO(current_struct_for == nullptr,
                    "Struct for cannot be nested.");
     auto for_stmt = for_stmt_->as<StructForStmt>();
     current_struct_for = for_stmt;
@@ -52,7 +52,7 @@ class GPUIRCodeGen : public IRVisitor {
           std::min(1 << leaf->total_num_bits, max_gpu_block_dim);
     }
 
-    TC_ASSERT((1 << leaf->total_num_bits) % for_stmt->block_dim == 0);
+    TI_ASSERT((1 << leaf->total_num_bits) % for_stmt->block_dim == 0);
     int block_division = (1 << leaf->total_num_bits) / for_stmt->block_dim;
 
     std::vector<SNode *> path;
@@ -148,7 +148,7 @@ class GPUIRCodeGen : public IRVisitor {
         for (auto &pad : scratch_pads->pads) {
           emit("__shared__ {}::val_type {}[{}];", pad.first->node_type_name,
                pad.second.name(), pad.second.linear_size());
-          TC_ASSERT(pad.second.is_pure());
+          TI_ASSERT(pad.second.is_pure());
           if (pad.second.total_flags == AccessFlag::read ||
               pad.second.total_flags == AccessFlag::accumulate) {
             // read & accumulate case
@@ -302,7 +302,7 @@ class GPUIRCodeGen : public IRVisitor {
     for (auto &o : opt) {
       if (o.first == 1) {
         ldg.insert(o.second);
-        TC_INFO("Caching to L1: {}", o.second->node_type_name);
+        TI_INFO("Caching to L1: {}", o.second->node_type_name);
       } else {
         new_opt.push_back(o);
       }
@@ -321,7 +321,7 @@ class GPUIRCodeGen : public IRVisitor {
     for_stmt_counter += 1;
     if (for_stmt_->is<RangeForStmt>()) {
       auto range_for = for_stmt_->as<RangeForStmt>();
-      TC_ASSERT(range_for->vectorize == 1);
+      TI_ASSERT(range_for->vectorize == 1);
 
       int begin = range_for->begin->as<ConstStmt>()->val[0].val_int32();
       int end = range_for->end->as<ConstStmt>()->val[0].val_int32();
@@ -347,7 +347,7 @@ class GPUIRCodeGen : public IRVisitor {
       emit("{{");
       int block_dim = range_for->block_dim;
       if (block_dim == 0) {
-        TC_WARN("Using default block size = 256");
+        TI_WARN("Using default block size = 256");
         block_dim = 256;
       }
       emit("gpu_runtime_init();");
@@ -361,7 +361,7 @@ class GPUIRCodeGen : public IRVisitor {
       emit("}}");
     } else {
       auto for_stmt = for_stmt_->as<StructForStmt>();
-      TC_ASSERT(for_stmt->vectorize == 1);
+      TI_ASSERT(for_stmt->vectorize == 1);
       extract_ldg(for_stmt->scratch_opt);
       struct_for(for_stmt_);
     }
@@ -451,7 +451,7 @@ class GPUIRCodeGen : public IRVisitor {
   }
 
   void visit(RandStmt *stmt) override {
-    TC_ASSERT(stmt->ret_type.data_type == DataType::f32);
+    TI_ASSERT(stmt->ret_type.data_type == DataType::f32);
     emit("const auto {} = randf();", stmt->raw_name(),
          stmt->ret_data_type_name());
   }
@@ -483,7 +483,7 @@ class GPUIRCodeGen : public IRVisitor {
   }
 
   void visit(TernaryOpStmt *tri) override {
-    TC_ASSERT(tri->op_type == TernaryOpType::select);
+    TI_ASSERT(tri->op_type == TernaryOpType::select);
     emit("const {} {} = {} ? {} : {};", tri->ret_data_type_name(),
          tri->raw_name(), tri->op1->raw_name(), tri->op2->raw_name(),
          tri->op3->raw_name());
@@ -511,7 +511,7 @@ class GPUIRCodeGen : public IRVisitor {
       emit("printf(\"[debug] {}\" \" = %f\\n\", {});", print_stmt->str,
            print_stmt->stmt->raw_name());
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
   }
 
@@ -533,7 +533,7 @@ class GPUIRCodeGen : public IRVisitor {
 
   void visit(StructForStmt *for_stmt) override {
     // generate_loop_header(for_stmt->snode, for_stmt, true);
-    TC_ASSERT_INFO(current_struct_for == nullptr,
+    TI_ASSERT_INFO(current_struct_for == nullptr,
                    "StructFor cannot be nested.");
     current_struct_for = for_stmt;
     for_stmt->body->accept(this);
@@ -560,7 +560,7 @@ class GPUIRCodeGen : public IRVisitor {
         emit("{} = {}_;", loop_var->raw_name(), loop_var->raw_name());
       }
     } else {
-      TC_ASSERT(!for_stmt->reversed);
+      TI_ASSERT(!for_stmt->reversed);
       emit("for ({} {} = {}; {} < {}; {} = {} + {}({})) {{",
            loop_var->ret_data_type_name(), loop_var->raw_name(),
            for_stmt->begin->raw_name(), loop_var->raw_name(),
@@ -594,8 +594,8 @@ class GPUIRCodeGen : public IRVisitor {
   }
 
   void visit(ExternalPtrStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
-    TC_ASSERT(stmt->indices.size() == 1);
+    TI_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->indices.size() == 1);
     auto dt = stmt->ret_type.data_type;
     emit("const {} *{}[1] = {{&{}[{}]}};", data_type_name(dt), stmt->raw_name(),
          stmt->base_ptrs[0]->raw_name(), stmt->indices[0]->raw_name());
@@ -607,7 +607,7 @@ class GPUIRCodeGen : public IRVisitor {
       return;
     }
 
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     emit("{} *{}[{}];", data_type_name(stmt->ret_type.data_type),
          stmt->raw_name(), stmt->ret_type.width);
     for (int l = 0; l < stmt->ret_type.width; l++) {
@@ -618,7 +618,7 @@ class GPUIRCodeGen : public IRVisitor {
       std::vector<std::string> indices(max_num_indices, "0");  // = "(root, ";
       for (int i = 0; i < (int)stmt->indices.size(); i++) {
         if (snode->physical_index_position[i] != -1) {
-          // TC_ASSERT(snode->physical_index_position[i] != -1);
+          // TI_ASSERT(snode->physical_index_position[i] != -1);
           indices[snode->physical_index_position[i]] =
               stmt->indices[i]->raw_name();
         }
@@ -634,7 +634,7 @@ class GPUIRCodeGen : public IRVisitor {
 
   void visit(SNodeOpStmt *stmt) override {
     /*
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto snode = stmt->snodes[0];
     auto indices = indices_str(snode, -1, stmt->indices);
 
@@ -650,7 +650,7 @@ class GPUIRCodeGen : public IRVisitor {
            make_list(indices, ""));
 
     if (stmt->op_type == SNodeOpType::append) {
-      TC_ASSERT(stmt->val->width() == 1);
+      TI_ASSERT(stmt->val->width() == 1);
       emit("{}_tmp->append({}({}));", snode->node_type_name,
            snode->ch[0]->node_type_name, stmt->val->raw_name());
     } else if (stmt->op_type == SNodeOpType::clear) {
@@ -670,7 +670,7 @@ class GPUIRCodeGen : public IRVisitor {
       emit("activate_{}(root, {});", snode->node_type_name,
            make_list(indices, ""));
     } else {
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
     emit("}}");
     */
@@ -699,7 +699,7 @@ class GPUIRCodeGen : public IRVisitor {
   }
 
   void visit(GlobalLoadStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     if (stmt->ptr->is<GlobalPtrStmt>()) {
       auto ptr = stmt->ptr->as<GlobalPtrStmt>();
       auto snode = ptr->snodes[0];
@@ -737,11 +737,11 @@ class GPUIRCodeGen : public IRVisitor {
   }
 
   void visit(AtomicOpStmt *stmt) override {
-    TC_ASSERT(stmt->val->ret_type.data_type == DataType::f32 ||
+    TI_ASSERT(stmt->val->ret_type.data_type == DataType::f32 ||
               stmt->val->ret_type.data_type == DataType::i32 ||
               stmt->val->ret_type.data_type == DataType::f64 ||
               stmt->val->ret_type.data_type == DataType::i64);
-    TC_ASSERT(stmt->op_type == AtomicOpType::add);
+    TI_ASSERT(stmt->op_type == AtomicOpType::add);
     auto ptr = stmt->dest->as<GlobalPtrStmt>();
     auto snode = ptr->snodes[0];
     if (current_scratch_pads && current_scratch_pads->has(snode)) {
@@ -759,7 +759,7 @@ class GPUIRCodeGen : public IRVisitor {
   void visit(ElementShuffleStmt *stmt) override {
     auto init = stmt->elements.serialize(
         [&](const VectorElement &elem) {
-          TC_ASSERT(elem.index == 0);
+          TI_ASSERT(elem.index == 0);
           if (stmt->pointer) {
             return fmt::format("{}[0]", elem.stmt->raw_name(), elem.index);
           } else {
@@ -780,9 +780,9 @@ class GPUIRCodeGen : public IRVisitor {
     // this does not necessarily hold since any index within the leaf block can
     // be the base
     /*
-    emit("TC_ASSERT({} + {} <= {});", stmt->base->raw_name(), stmt->low,
+    emit("TI_ASSERT({} + {} <= {});", stmt->base->raw_name(), stmt->low,
          stmt->input->raw_name());
-    emit("TC_ASSERT({} < {} + {});", stmt->input->raw_name(),
+    emit("TI_ASSERT({} < {} + {});", stmt->input->raw_name(),
          stmt->base->raw_name(), stmt->high);
          */
     emit("const auto {} = {};", stmt->raw_name(), stmt->input->raw_name());
@@ -790,7 +790,7 @@ class GPUIRCodeGen : public IRVisitor {
 
   void visit(AssertStmt *stmt) override {
     emit("#if defined(TL_DEBUG)");
-    emit(R"(TC_ASSERT_INFO({}, "{}");)", stmt->val->raw_name(), stmt->text);
+    emit(R"(TI_ASSERT_INFO({}, "{}");)", stmt->val->raw_name(), stmt->text);
     emit("#endif");
   }
 
@@ -978,7 +978,7 @@ class GPUIRCodeGen : public IRVisitor {
 };
 
 void GPUCodeGen::lower_cuda() {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 }
 
 void GPUCodeGen::lower_llvm() {
@@ -995,7 +995,7 @@ void GPUCodeGen::lower_llvm() {
   if (kernel->grad) {
     irpass::reverse_segments(ir);
     if (print_ir) {
-      TC_TRACE("Segment reversed (for autodiff):");
+      TI_TRACE("Segment reversed (for autodiff):");
       irpass::re_id(ir);
       irpass::print(ir);
     }
@@ -1014,7 +1014,7 @@ void GPUCodeGen::lower_llvm() {
     irpass::demote_dense_struct_fors(ir);
     irpass::typecheck(ir);
     if (print_ir) {
-      TC_TRACE("Dense Struct-for demoted:");
+      TI_TRACE("Dense Struct-for demoted:");
       irpass::print(ir);
     }
   }
@@ -1023,85 +1023,85 @@ void GPUCodeGen::lower_llvm() {
     irpass::simplify(ir);
     irpass::re_id(ir);
     if (print_ir) {
-      TC_TRACE("Simplified I:");
+      TI_TRACE("Simplified I:");
       irpass::print(ir);
     }
   }
   if (kernel->grad) {
     // irpass::re_id(ir);
-    // TC_TRACE("Primal:");
+    // TI_TRACE("Primal:");
     // irpass::print(ir);
     irpass::demote_atomics(ir);
     irpass::full_simplify(ir);
     irpass::typecheck(ir);
     if (print_ir) {
-      TC_TRACE("Before make_adjoint:");
+      TI_TRACE("Before make_adjoint:");
       irpass::print(ir);
     }
     irpass::make_adjoint(ir);
     if (print_ir) {
-      TC_TRACE("After make_adjoint:");
+      TI_TRACE("After make_adjoint:");
       irpass::print(ir);
     }
     irpass::typecheck(ir);
     // irpass::re_id(ir);
-    // TC_TRACE("Adjoint:");
+    // TI_TRACE("Adjoint:");
     // irpass::print(ir);
   }
   irpass::lower_access(ir, prog->config.use_llvm);
   if (print_ir) {
-    TC_TRACE("Access Lowered:");
+    TI_TRACE("Access Lowered:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   if (prog->config.simplify_after_lower_access) {
     irpass::die(ir);
     if (print_ir) {
-      TC_TRACE("DIEd:");
+      TI_TRACE("DIEd:");
       irpass::re_id(ir);
       irpass::print(ir);
     }
     irpass::simplify(ir);
     if (print_ir) {
-      TC_TRACE("Simplified II:");
+      TI_TRACE("Simplified II:");
       irpass::re_id(ir);
       irpass::print(ir);
     }
   }
   irpass::die(ir);
   if (print_ir) {
-    TC_TRACE("DIEd:");
+    TI_TRACE("DIEd:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::flag_access(ir);
   if (print_ir) {
-    TC_TRACE("Access Flagged:");
+    TI_TRACE("Access Flagged:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::offload(ir);
   if (print_ir) {
-    TC_TRACE("Offloaded:");
+    TI_TRACE("Offloaded:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::demote_atomics(ir);
   if (print_ir) {
-    TC_TRACE("Atomics Demoted:");
+    TI_TRACE("Atomics Demoted:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::full_simplify(ir);
   if (print_ir) {
-    TC_TRACE("Simplified III:");
+    TI_TRACE("Simplified III:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 }
 
 void GPUCodeGen::lower() {
-  TC_PROFILER(__FUNCTION__)
+  TI_PROFILER(__FUNCTION__)
   if (prog->config.use_llvm) {
     lower_llvm();
   } else {
@@ -1110,7 +1110,7 @@ void GPUCodeGen::lower() {
 }
 
 void GPUCodeGen::codegen() {
-  emit("#define TC_GPU");
+  emit("#define TI_GPU");
   generate_header();
 
   // Body
diff --git a/taichi/backends/codegen_llvm.h b/taichi/backends/codegen_llvm.h
index 8dfc1da42af2f..358394f2c617d 100644
--- a/taichi/backends/codegen_llvm.h
+++ b/taichi/backends/codegen_llvm.h
@@ -77,15 +77,15 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     }
 
     void compile() {
-      TC_ASSERT(!func);
+      TI_ASSERT(!func);
       auto kernel_symbol = codegen->jit->lookup(name);
-      TC_ASSERT_INFO(kernel_symbol, "Function not found");
+      TI_ASSERT_INFO(kernel_symbol, "Function not found");
 
       func = (task_fp_type)(void *)(llvm::cantFail(kernel_symbol.getAddress()));
     }
 
     void operator()(Context *context) {
-      TC_ASSERT(func);
+      TI_ASSERT(func);
       func(context);
     }
   };
@@ -199,8 +199,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
       meta->call("set_bitmasked", tlctx->get_constant(snode->_bitmasked));
       meta->call("set_morton_dim", tlctx->get_constant((int)snode->_morton));
     } else if (snode->type == SNodeType::pointer) {
-      meta = std::make_unique<RuntimeObject>("PointerMeta", this, builder);
-      emit_struct_meta_base("Pointer", meta->ptr, snode);
+      meta = std::make_unique<RuntimeObject>("pointerMeta", this, builder);
+      emit_struct_meta_base("pointer", meta->ptr, snode);
     } else if (snode->type == SNodeType::root) {
       meta = std::make_unique<RuntimeObject>("RootMeta", this, builder);
       emit_struct_meta_base("Root", meta->ptr, snode);
@@ -209,8 +209,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
       emit_struct_meta_base("Dynamic", meta->ptr, snode);
       meta->call("set_chunk_size", tlctx->get_constant(snode->chunk_size));
     } else {
-      TC_P(snode_type_name(snode->type));
-      TC_NOT_IMPLEMENTED;
+      TI_P(snode_type_name(snode->type));
+      TI_NOT_IMPLEMENTED;
     }
     if (false) {
       // auto ptr_type = llvm::Type::getInt8PtrTy(*llvm_context, 0);
@@ -225,7 +225,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
 
   llvm::Value *emit_struct_meta(SNode *snode) {
     auto obj = emit_struct_meta_object(snode);
-    TC_ASSERT(obj != nullptr);
+    TI_ASSERT(obj != nullptr);
     return obj->ptr;
   }
 
@@ -254,7 +254,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
 
   template <typename... Args>
   void emit(std::string f, Args &&... args) {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
     codegen->emit(f, std::forward<Args>(args)...);
   }
 
@@ -265,7 +265,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   }
 
   void visit(AllocaStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     stmt->value = create_entry_block_alloca(stmt->ret_type.data_type);
     // initialize as zero
     builder->CreateStore(tlctx->get_constant(stmt->ret_type.data_type, 0),
@@ -295,7 +295,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
       stmt->value =                                                    \
           builder->CreateCall(get_runtime_function(#x "_i32"), input); \
     } else {                                                           \
-      TC_NOT_IMPLEMENTED                                               \
+      TI_NOT_IMPLEMENTED                                               \
     }                                                                  \
   }
     if (false) {
@@ -316,8 +316,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
                                              {input_type}, {input});
     }
     else {
-      TC_P(unary_op_type_name(op));
-      TC_NOT_IMPLEMENTED
+      TI_P(unary_op_type_name(op));
+      TI_NOT_IMPLEMENTED
     }
 #undef UNARY_STD
   }
@@ -359,16 +359,16 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
         llvm::CastInst::CastOps cast_op;
         auto from = stmt->operand->ret_type.data_type;
         auto to = stmt->cast_type;
-        TC_ASSERT(from != to);
+        TI_ASSERT(from != to);
         if (is_real(from) != is_real(to)) {
           if (is_real(from) && is_integral(to)) {
             cast_op = llvm::Instruction::CastOps::FPToSI;
           } else if (is_integral(from) && is_real(to)) {
             cast_op = llvm::Instruction::CastOps::SIToFP;
           } else {
-            TC_P(data_type_name(from));
-            TC_P(data_type_name(to));
-            TC_NOT_IMPLEMENTED;
+            TI_P(data_type_name(from));
+            TI_P(data_type_name(to));
+            TI_NOT_IMPLEMENTED;
           }
           stmt->value =
               builder->CreateCast(cast_op, stmt->operand->value,
@@ -391,7 +391,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
           }
         }
       } else {
-        TC_ASSERT(data_type_size(stmt->ret_type.data_type) ==
+        TI_ASSERT(data_type_size(stmt->ret_type.data_type) ==
                   data_type_size(stmt->cast_type));
         stmt->value = builder->CreateBitCast(
             stmt->operand->value, tlctx->get_data_type(stmt->cast_type));
@@ -409,7 +409,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     } else if (dt == DataType::f64) {
       return llvm::Type::getDoubleTy(*llvm_context);
     } else {
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
     return nullptr;
   }
@@ -466,8 +466,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
         stmt->value =
             create_call("max_i32", {stmt->lhs->value, stmt->rhs->value});
       } else {
-        TC_P(data_type_name(ret_type));
-        TC_NOT_IMPLEMENTED
+        TI_P(data_type_name(ret_type));
+        TI_NOT_IMPLEMENTED
       }
     } else if (op == BinaryOpType::atan2) {
       if (current_arch() == Arch::x86_64) {
@@ -478,8 +478,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
           stmt->value =
               create_call("atan2_f64", {stmt->lhs->value, stmt->rhs->value});
         } else {
-          TC_P(data_type_name(ret_type));
-          TC_NOT_IMPLEMENTED
+          TI_P(data_type_name(ret_type));
+          TI_NOT_IMPLEMENTED
         }
       } else if (current_arch() == Arch::cuda) {
         if (ret_type == DataType::f32) {
@@ -489,11 +489,11 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
           stmt->value =
               create_call("__nv_atan2", {stmt->lhs->value, stmt->rhs->value});
         } else {
-          TC_P(data_type_name(ret_type));
-          TC_NOT_IMPLEMENTED
+          TI_P(data_type_name(ret_type));
+          TI_NOT_IMPLEMENTED
         }
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
     } else if (op == BinaryOpType::pow) {
       if (current_arch() == Arch::x86_64) {
@@ -510,8 +510,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
           stmt->value =
               create_call("pow_i64", {stmt->lhs->value, stmt->rhs->value});
         } else {
-          TC_P(data_type_name(ret_type));
-          TC_NOT_IMPLEMENTED
+          TI_P(data_type_name(ret_type));
+          TI_NOT_IMPLEMENTED
         }
       } else if (current_arch() == Arch::cuda) {
         if (ret_type == DataType::f32) {
@@ -527,11 +527,11 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
           stmt->value =
               create_call("pow_i64", {stmt->lhs->value, stmt->rhs->value});
         } else {
-          TC_P(data_type_name(ret_type));
-          TC_NOT_IMPLEMENTED
+          TI_P(data_type_name(ret_type));
+          TI_NOT_IMPLEMENTED
         }
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
     } else if (op == BinaryOpType::min) {
       if (is_real(ret_type)) {
@@ -540,8 +540,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
         stmt->value =
             create_call("min_i32", {stmt->lhs->value, stmt->rhs->value});
       } else {
-        TC_P(data_type_name(ret_type));
-        TC_NOT_IMPLEMENTED
+        TI_P(data_type_name(ret_type));
+        TI_NOT_IMPLEMENTED
       }
     } else if (is_comparison(op)) {
       llvm::Value *cmp = nullptr;
@@ -599,17 +599,17 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
           cmp = builder->CreateICmpNE(stmt->lhs->value, stmt->rhs->value);
         }
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
       stmt->value = builder->CreateSExt(cmp, llvm_type(DataType::i32));
     } else {
-      TC_P(binary_op_type_name(op));
-      TC_NOT_IMPLEMENTED
+      TI_P(binary_op_type_name(op));
+      TI_NOT_IMPLEMENTED
     }
   }
 
   void visit(TernaryOpStmt *stmt) override {
-    TC_ASSERT(stmt->op_type == TernaryOpType::select);
+    TI_ASSERT(stmt->op_type == TernaryOpType::select);
     stmt->value = builder->CreateSelect(
         builder->CreateTrunc(stmt->op1->value, llvm_type(DataType::i1)),
         stmt->op2->value, stmt->op3->value);
@@ -644,7 +644,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     if (dt == DataType::i32) {
       format = "%d";
     } else if (dt == DataType::i64) {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
       format = "%lld";
 #else
       format = "%I64d";
@@ -655,7 +655,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     } else if (dt == DataType::f64) {
       format = "%.12f";
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
     args.push_back(builder->CreateGlobalStringPtr(
         ("[llvm codegen debug] " + tag + " = " + format + "\n").c_str(),
@@ -666,7 +666,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   }
 
   void visit(PrintStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     std::vector<Value *> args;
     std::string format;
     auto value = stmt->stmt->value;
@@ -674,7 +674,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     if (dt == DataType::i32) {
       format = "%d";
     } else if (dt == DataType::i64) {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
       format = "%lld";
 #else
       format = "%I64d";
@@ -685,7 +685,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     } else if (dt == DataType::f64) {
       format = "%.12f";
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
     args.push_back(builder->CreateGlobalStringPtr(
         ("[debug] " + stmt->str + " = " + format + "\n").c_str(),
@@ -697,7 +697,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   }
 
   void visit(ConstStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto val = stmt->val[0];
     if (val.dt == DataType::f32) {
       stmt->value = llvm::ConstantFP::get(*llvm_context,
@@ -712,14 +712,14 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
       stmt->value = llvm::ConstantInt::get(
           *llvm_context, llvm::APInt(64, val.val_int64(), true));
     } else {
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
   }
 
   void visit(WhileControlStmt *stmt) override {
     BasicBlock *after_break =
         BasicBlock::Create(*llvm_context, "after_break", func);
-    TC_ASSERT(while_after_loop);
+    TI_ASSERT(while_after_loop);
     auto cond =
         builder->CreateICmpEQ(stmt->cond->value, tlctx->get_constant(0));
     builder->CreateCondBr(cond, while_after_loop, after_break);
@@ -865,7 +865,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
 
   void visit(ArgStoreStmt *stmt) override {
     if (stmt->is_ptr) {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     } else {
       auto intermediate_bits =
           tlctx->get_data_type(stmt->val->ret_type.data_type)
@@ -882,14 +882,14 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   }
 
   void visit(LocalLoadStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     stmt->value = builder->CreateLoad(stmt->ptr[0].var->value);
   }
 
   void visit(LocalStoreStmt *stmt) override {
     auto mask = stmt->parent->mask();
     if (mask && stmt->width() != 1) {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     } else {
       builder->CreateStore(stmt->data->value, stmt->ptr->value);
     }
@@ -903,30 +903,33 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   void visit(SNodeOpStmt *stmt) override {
     auto snode = stmt->snode;
     if (stmt->op_type == SNodeOpType::append) {
-      TC_ASSERT(snode->type == SNodeType::dynamic);
-      TC_ASSERT(stmt->ret_type.data_type == DataType::i32);
+      TI_ASSERT(snode->type == SNodeType::dynamic);
+      TI_ASSERT(stmt->ret_type.data_type == DataType::i32);
       stmt->value = call(snode, stmt->ptr->value, "append", {stmt->val->value});
     } else if (stmt->op_type == SNodeOpType::length) {
-      TC_ASSERT(snode->type == SNodeType::dynamic);
+      TI_ASSERT(snode->type == SNodeType::dynamic);
       stmt->value = call(snode, stmt->ptr->value, "get_num_elements", {});
     } else if (stmt->op_type == SNodeOpType::is_active) {
       stmt->value =
           call(snode, stmt->ptr->value, "is_active", {stmt->val->value});
     } else if (stmt->op_type == SNodeOpType::deactivate) {
-      TC_ASSERT(snode->type == SNodeType::pointer ||
-                snode->type == SNodeType::dynamic);
-      stmt->value = call(snode, stmt->ptr->value, "deactivate", {});
+      if (snode->type == SNodeType::pointer || snode->type == SNodeType::hash) {
+        stmt->value =
+            call(snode, stmt->ptr->value, "deactivate", {stmt->val->value});
+      } else if (snode->type == SNodeType::dynamic) {
+        stmt->value = call(snode, stmt->ptr->value, "deactivate", {});
+      }
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
   }
 
   void visit(AtomicOpStmt *stmt) override {
     // auto mask = stmt->parent->mask();
     // TODO: deal with mask when vectorized
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     for (int l = 0; l < stmt->width(); l++) {
-      TC_ASSERT(stmt->op_type == AtomicOpType::add);
+      TI_ASSERT(stmt->op_type == AtomicOpType::add);
       llvm::Value *old_value;
       if (stmt->val->ret_type.data_type == DataType::i32)
         old_value = builder->CreateAtomicRMW(
@@ -939,26 +942,26 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
         old_value = builder->CreateCall(get_runtime_function("atomic_add_f64"),
                                         {stmt->dest->value, stmt->val->value});
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
       stmt->value = old_value;
     }
   }
 
   void visit(GlobalPtrStmt *stmt) override {
-    TC_ERROR("Global Ptrs should have been lowered.");
+    TI_ERROR("Global Ptrs should have been lowered.");
   }
 
   void visit(GlobalStoreStmt *stmt) override {
-    TC_ASSERT(!stmt->parent->mask() || stmt->width() == 1);
-    TC_ASSERT(stmt->data->value);
-    TC_ASSERT(stmt->ptr->value);
+    TI_ASSERT(!stmt->parent->mask() || stmt->width() == 1);
+    TI_ASSERT(stmt->data->value);
+    TI_ASSERT(stmt->ptr->value);
     builder->CreateStore(stmt->data->value, stmt->ptr->value);
   }
 
   void visit(GlobalLoadStmt *stmt) override {
     int width = stmt->width();
-    TC_ASSERT(width == 1);
+    TI_ASSERT(width == 1);
     stmt->value = builder->CreateLoad(
         tlctx->get_data_type(stmt->ret_type.data_type), stmt->ptr->value);
   }
@@ -986,12 +989,12 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     } else if (snode->type == SNodeType::dynamic) {
       return "Dynamic";
     } else if (snode->type == SNodeType::pointer) {
-      return "Pointer";
+      return "pointer";
     } else if (snode->type == SNodeType::hash) {
       return "Hash";
     } else {
-      TC_P(snode_type_name(snode->type));
-      TC_NOT_IMPLEMENTED
+      TI_P(snode_type_name(snode->type));
+      TI_NOT_IMPLEMENTED
     }
   }
 
@@ -1008,6 +1011,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
                                       llvm::Type::getInt8PtrTy(*llvm_context));
 
     std::vector<llvm::Value *> func_arguments{s_ptr, node_ptr};
+
     func_arguments.insert(func_arguments.end(), arguments.begin(),
                           arguments.end());
 
@@ -1040,7 +1044,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   }
 
   void visit(IntegerOffsetStmt *stmt) override {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
     if (stmt->input->is<GetChStmt>() &&
         stmt->input->as<GetChStmt>()->output_snode->type == SNodeType::place) {
       auto input = stmt->input->as<GetChStmt>();
@@ -1056,7 +1060,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   void visit(SNodeLookupStmt *stmt) override {
     llvm::Value *parent = nullptr;
     parent = stmt->input_snode->value;
-    TC_ASSERT(parent);
+    TI_ASSERT(parent);
     auto snode = stmt->snode;
     if (snode->type == SNodeType::root) {
       stmt->value = builder->CreateGEP(parent, stmt->input_index->value);
@@ -1070,8 +1074,8 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
       stmt->value = call(snode, stmt->input_snode->value, "lookup_element",
                          {stmt->input_index->value});
     } else {
-      TC_INFO(snode_type_name(snode->type));
-      TC_NOT_IMPLEMENTED
+      TI_INFO(snode_type_name(snode->type));
+      TI_NOT_IMPLEMENTED
     }
   }
 
@@ -1085,7 +1089,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
   }
 
   void visit(ExternalPtrStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
 
     auto argload = stmt->base_ptrs[0]->as<ArgLoadStmt>();
     auto arg_id = argload->arg_id;
@@ -1162,11 +1166,11 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     builder->CreateBr(func_body_bb);
 
     if (prog->config.print_kernel_llvm_ir) {
-      TC_INFO("Kernel Module IR");
+      TI_INFO("Kernel Module IR");
       module->print(errs(), nullptr);
     }
-    TC_ASSERT(!llvm::verifyFunction(*func, &errs()));
-    // TC_INFO("Kernel function verified.");
+    TI_ASSERT(!llvm::verifyFunction(*func, &errs()));
+    // TI_INFO("Kernel function verified.");
   }
 
   class FunctionCreationGuard {
@@ -1275,7 +1279,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
 
   void create_offload_struct_for(OffloadedStmt *stmt, bool spmd = false) {
     llvm::Function *body;
-    auto leaf_block = stmt->snode->parent;
+    auto leaf_block = stmt->snode;
     {
       // Create the loop body function
       auto guard = get_function_creation_gurad({
@@ -1408,7 +1412,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     auto buffer = call("Runtime_get_temporary_pointer", runtime,
                        tlctx->get_constant((int64)stmt->offset));
 
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto ptr_type = llvm::PointerType::get(
         tlctx->get_data_type(stmt->ret_type.data_type), 0);
     stmt->value = builder->CreatePointerCast(buffer, ptr_type);
@@ -1442,7 +1446,7 @@ class CodeGenLLVM : public IRVisitor, public ModuleBuilder {
     } else if (stmt->task_type == Type::gc) {
       emit_gc(stmt);
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
     if (prog->config.enable_profiler) {
       call(builder, "Runtime_profiler_stop", {get_runtime()});
diff --git a/taichi/backends/codegen_llvm_ptx.cpp b/taichi/backends/codegen_llvm_ptx.cpp
index edb4037301d41..e5921fc198fe1 100644
--- a/taichi/backends/codegen_llvm_ptx.cpp
+++ b/taichi/backends/codegen_llvm_ptx.cpp
@@ -72,17 +72,17 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
     auto offloaded_local = offloaded_tasks;
     for (auto &task : offloaded_local) {
       llvm::Function *func = module->getFunction(task.name);
-      TC_ASSERT(func);
+      TI_ASSERT(func);
       mark_function_as_cuda_kernel(func);
     }
 
     if (prog->config.print_kernel_llvm_ir) {
-      TC_INFO("IR before global optimization");
+      TI_INFO("IR before global optimization");
       module->print(errs(), nullptr);
     }
     auto ptx = compile_module_to_ptx(module);
     if (prog->config.print_kernel_llvm_ir_optimized) {
-      TC_P(ptx);
+      TI_P(ptx);
     }
     auto cuda_module = cuda_context->compile(ptx);
 
@@ -93,7 +93,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
     auto prog = this->prog;
     return [offloaded_local, prog](Context context) {
       for (auto task : offloaded_local) {
-        TC_DEBUG("Launching kernel {}<<<{}, {}>>>", task.name, task.grid_dim,
+        TI_DEBUG("Launching kernel {}<<<{}, {}>>>", task.name, task.grid_dim,
                  task.block_dim);
 
         ProfilerBase *profiler = nullptr;
@@ -105,13 +105,13 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
       }
     };
 #else
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
     return nullptr;
 #endif
   }
 
   void visit(PrintStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
 
     auto value_type = tlctx->get_data_type(stmt->stmt->ret_type.data_type);
 
@@ -130,7 +130,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
     } else if (stmt->stmt->ret_type.data_type == DataType::f64) {
       format = "%.12f";
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
 
     std::vector<llvm::Type *> types{value_type};
@@ -166,7 +166,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
     } else if (input_taichi_type == DataType::i32) {                        \
       stmt->value = builder->CreateCall(get_runtime_function(#x), input);   \
     } else {                                                                \
-      TC_NOT_IMPLEMENTED                                                    \
+      TI_NOT_IMPLEMENTED                                                    \
     }                                                                       \
   }
     if (op == UnaryOpType::abs) {
@@ -180,7 +180,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
         stmt->value =
             builder->CreateCall(get_runtime_function("__nv_abs"), input);
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
     } else if (op == UnaryOpType::sqrt) {
       if (input_taichi_type == DataType::f32) {
@@ -190,14 +190,14 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
         stmt->value =
             builder->CreateCall(get_runtime_function("__nv_sqrt"), input);
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
     } else if (op == UnaryOpType::logic_not) {
       if (input_taichi_type == DataType::i32) {
         stmt->value =
             builder->CreateCall(get_runtime_function("logic_not_i32"), input);
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
     }
     UNARY_STD(exp)
@@ -210,21 +210,21 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
     UNARY_STD(cos)
     UNARY_STD(sin)
     else {
-      TC_P(unary_op_type_name(op));
-      TC_NOT_IMPLEMENTED
+      TI_P(unary_op_type_name(op));
+      TI_NOT_IMPLEMENTED
     }
 #undef UNARY_STD
   }
 
   void visit(AtomicOpStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     // https://llvm.org/docs/NVPTXUsage.html#address-spaces
     bool is_local = stmt->dest->is<AllocaStmt>();
     if (is_local) {
-      TC_ERROR("Local atomics should have been demoted.");
+      TI_ERROR("Local atomics should have been demoted.");
     } else {
       for (int l = 0; l < stmt->width(); l++) {
-        TC_ASSERT(stmt->op_type == AtomicOpType::add);
+        TI_ASSERT(stmt->op_type == AtomicOpType::add);
         llvm::Value *old_value;
         if (is_integral(stmt->val->ret_type.data_type)) {
           old_value = builder->CreateAtomicRMW(
@@ -243,7 +243,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
                                        {llvm::PointerType::get(dt, 0)},
                                        {stmt->dest->value, stmt->val->value});
         } else {
-          TC_NOT_IMPLEMENTED
+          TI_NOT_IMPLEMENTED
         }
         stmt->value = old_value;
       }
@@ -344,7 +344,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
         if (kernel_block_dim == 0)
           kernel_block_dim = prog->config.default_gpu_block_dim;
         kernel_block_dim =
-            std::min(stmt->snode->parent->max_num_elements(), kernel_block_dim);
+            std::min(stmt->snode->max_num_elements(), kernel_block_dim);
         stmt->block_dim = kernel_block_dim;
         create_offload_struct_for(stmt, true);
       } else if (stmt->task_type == Type::clear_list) {
@@ -355,7 +355,7 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
         kernel_block_dim = std::min(branching, 64);
         emit_list_gen(stmt);
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
       finalize_offloaded_task_function();
       current_task->grid_dim = kernel_grid_dim;
@@ -364,13 +364,13 @@ class CodeGenLLVMGPU : public CodeGenLLVM {
       current_task = nullptr;
     }
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   }
 };
 
 FunctionType GPUCodeGen::codegen_llvm() {
-  TC_PROFILER("cuda codegen");
+  TI_PROFILER("cuda codegen");
   return CodeGenLLVMGPU(this, kernel).gen();
 }
 
diff --git a/taichi/backends/codegen_llvm_x86.cpp b/taichi/backends/codegen_llvm_x86.cpp
index 84ccca6053e06..a70831bc49884 100644
--- a/taichi/backends/codegen_llvm_x86.cpp
+++ b/taichi/backends/codegen_llvm_x86.cpp
@@ -24,7 +24,7 @@ class CodeGenLLVMCPU : public CodeGenLLVM {
 };
 
 FunctionType CPUCodeGen::codegen_llvm() {
-  TC_PROFILER("cpu codegen");
+  TI_PROFILER("cpu codegen");
   return CodeGenLLVMCPU(this, kernel).gen();
 }
 
@@ -32,7 +32,7 @@ void global_optimize_module_x86_64(std::unique_ptr<llvm::Module> &module) {
   TI_AUTO_PROF
   auto JTMB = JITTargetMachineBuilder::detectHost();
   if (!JTMB) {
-    TC_ERROR("Target machine creation failed.");
+    TI_ERROR("Target machine creation failed.");
   }
   module->setTargetTriple(JTMB->getTargetTriple().str());
   llvm::Triple triple(module->getTargetTriple());
@@ -40,7 +40,7 @@ void global_optimize_module_x86_64(std::unique_ptr<llvm::Module> &module) {
   std::string err_str;
   const llvm::Target *target =
       TargetRegistry::lookupTarget(triple.str(), err_str);
-  TC_ERROR_UNLESS(target, err_str);
+  TI_ERROR_UNLESS(target, err_str);
 
   TargetOptions options;
   options.PrintMachineCode = false;
@@ -69,7 +69,7 @@ void global_optimize_module_x86_64(std::unique_ptr<llvm::Module> &module) {
       triple.str(), mcpu.str(), "", options, llvm::Reloc::PIC_,
       llvm::CodeModel::Small, CodeGenOpt::Aggressive));
 
-  TC_ERROR_UNLESS(target_machine.get(), "Could not allocate target machine!");
+  TI_ERROR_UNLESS(target_machine.get(), "Could not allocate target machine!");
 
   module->setDataLayout(target_machine->createDataLayout());
 
@@ -98,9 +98,9 @@ void global_optimize_module_x86_64(std::unique_ptr<llvm::Module> &module) {
   auto t = Time::get_time();
   module_pass_manager.run(*module);
   t = Time::get_time() - t;
-  // TC_INFO("Global optimization time: {} ms", t * 1000);
+  // TI_INFO("Global optimization time: {} ms", t * 1000);
   if (get_current_program().config.print_kernel_llvm_ir_optimized) {
-    TC_INFO("Global optimized IR:");
+    TI_INFO("Global optimized IR:");
     module->print(llvm::errs(), nullptr);
   }
 }
diff --git a/taichi/backends/codegen_metal.cpp b/taichi/backends/codegen_metal.cpp
index ebe7af5bca16b..a747fff03f298 100644
--- a/taichi/backends/codegen_metal.cpp
+++ b/taichi/backends/codegen_metal.cpp
@@ -50,7 +50,7 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(ConstStmt *const_stmt) override {
-    TC_ASSERT(const_stmt->width() == 1);
+    TI_ASSERT(const_stmt->width() == 1);
     emit("const {} {} = {};", metal_data_type_name(const_stmt->element_type()),
          const_stmt->raw_name(), const_stmt->val[0].stringify());
   }
@@ -69,7 +69,7 @@ class MetalKernelCodegen : public IRVisitor {
       emit("const {} {}({});", metal_data_type_name(stmt->element_type()),
            stmt->raw_name(), ptr->raw_name());
     } else {
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
   }
 
@@ -114,7 +114,7 @@ class MetalKernelCodegen : public IRVisitor {
     if (stmt->input_snode) {
       parent = stmt->input_snode->raw_name();
     } else {
-      TC_ASSERT(root_stmt_ != nullptr);
+      TI_ASSERT(root_stmt_ != nullptr);
       parent = root_stmt_->raw_name();
     }
 
@@ -123,12 +123,12 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(GlobalStoreStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     emit(R"(*{} = {};)", stmt->ptr->raw_name(), stmt->data->raw_name());
   }
 
   void visit(GlobalLoadStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     emit(R"({} {} = *{};)", metal_data_type_name(stmt->element_type()),
          stmt->raw_name(), stmt->ptr->raw_name());
   }
@@ -146,7 +146,7 @@ class MetalKernelCodegen : public IRVisitor {
 
   void visit(ArgStoreStmt *stmt) override {
     const auto dt = metal_data_type_name(stmt->element_type());
-    TC_ASSERT(!stmt->is_ptr);
+    TI_ASSERT(!stmt->is_ptr);
     emit("*{}.arg{}() = {};", kArgsContextName, stmt->arg_id,
          stmt->val->raw_name());
   }
@@ -154,7 +154,7 @@ class MetalKernelCodegen : public IRVisitor {
   void visit(ExternalPtrStmt *stmt) override {
     // Used mostly for transferring data between host (e.g. numpy array) and
     // Metal.
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     const auto linear_index_name =
         fmt::format("{}_linear_index_", stmt->raw_name());
     emit("int {} = 0;", linear_index_name);
@@ -184,16 +184,16 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(GlobalTemporaryStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     const auto dt = metal_data_type_name(stmt->element_type());
     emit("device {}* {} = reinterpret_cast<device {}*>({} + {});", dt,
          stmt->raw_name(), dt, kGlobalTmpsBufferName, stmt->offset);
   }
 
   void visit(LoopIndexStmt *stmt) override {
-    TC_ASSERT(current_kernel_attribs_->task_type ==
+    TI_ASSERT(current_kernel_attribs_->task_type ==
               OffloadedStmt::TaskType::range_for);
-    TC_ASSERT(!stmt->is_struct_for && stmt->index == 0);
+    TI_ASSERT(!stmt->is_struct_for && stmt->index == 0);
     if (current_kernel_attribs_->range_for_attribs.const_begin) {
       emit("const int {} = (static_cast<int>({}) + {});", stmt->raw_name(),
            kKernelThreadIdName,
@@ -221,7 +221,7 @@ class MetalKernelCodegen : public IRVisitor {
         // reinterpret the bit pattern
         const auto to_type = to_metal_type(stmt->cast_type);
         const auto to_type_name = metal_data_type_name(to_type);
-        TC_ASSERT(metal_data_type_bytes(
+        TI_ASSERT(metal_data_type_bytes(
                       to_metal_type(stmt->operand->element_type())) ==
                   metal_data_type_bytes(to_type));
         emit("const {} {} = union_cast<{}>({});", to_type_name,
@@ -257,15 +257,15 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(TernaryOpStmt *tri) override {
-    TC_ASSERT(tri->op_type == TernaryOpType::select);
+    TI_ASSERT(tri->op_type == TernaryOpType::select);
     emit("const {} {} = ({}) ? ({}) : ({});",
          metal_data_type_name(tri->element_type()), tri->raw_name(),
          tri->op1->raw_name(), tri->op2->raw_name(), tri->op3->raw_name());
   }
 
   void visit(AtomicOpStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
-    TC_ASSERT(stmt->op_type == AtomicOpType::add);
+    TI_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->op_type == AtomicOpType::add);
     const auto dt = stmt->val->element_type();
     if (dt == DataType::i32) {
       emit(
@@ -277,7 +277,7 @@ class MetalKernelCodegen : public IRVisitor {
       emit("const float {} = fatomic_fetch_add({}, {});", stmt->raw_name(),
            stmt->dest->raw_name(), stmt->val->raw_name());
     } else {
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
   }
 
@@ -294,7 +294,7 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(RangeForStmt *for_stmt) override {
-    TC_ASSERT(for_stmt->width() == 1);
+    TI_ASSERT(for_stmt->width() == 1);
     auto *loop_var = for_stmt->loop_var;
     if (loop_var->ret_type.data_type == DataType::i32) {
       if (!for_stmt->reversed) {
@@ -312,7 +312,7 @@ class MetalKernelCodegen : public IRVisitor {
         emit("  int {} = {}_;", loop_var->raw_name(), loop_var->raw_name());
       }
     } else {
-      TC_ASSERT(!for_stmt->reversed);
+      TI_ASSERT(!for_stmt->reversed);
       const auto type_name = metal_data_type_name(loop_var->element_type());
       emit("for ({} {} = {}; {} < {}; {} = {} + ({})1) {{", type_name,
            loop_var->raw_name(), for_stmt->begin->raw_name(),
@@ -324,11 +324,11 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(StructForStmt *) override {
-    TC_ERROR("Struct for cannot be nested.");
+    TI_ERROR("Struct for cannot be nested.");
   }
 
   void visit(OffloadedStmt *stmt) override {
-    TC_ASSERT(is_top_level_);
+    TI_ASSERT(is_top_level_);
     is_top_level_ = false;
     using Type = OffloadedStmt::TaskType;
     if (stmt->task_type == Type::serial) {
@@ -338,7 +338,7 @@ class MetalKernelCodegen : public IRVisitor {
     } else {
       // struct_for is automatically lowered to ranged_for for dense snodes
       // (#378). So we only need to support serial and range_for tasks.
-      TC_ERROR("Unsupported offload type={} on Metal arch", stmt->task_name());
+      TI_ERROR("Unsupported offload type={} on Metal arch", stmt->task_name());
     }
     is_top_level_ = true;
   }
@@ -354,12 +354,12 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void visit(RandStmt *stmt) override {
-    TC_ERROR("Metal arch doesn't support ti.random() yet");
+    TI_ERROR("Metal arch doesn't support ti.random() yet");
   }
 
   void visit(PrintStmt *stmt) override {
     // TODO: Add a flag to control whether ignoring print() stmt is allowed.
-    TC_WARN("Cannot print inside Metal kernel, ignored");
+    TI_WARN("Cannot print inside Metal kernel, ignored");
   }
 
  private:
@@ -376,10 +376,10 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void generate_common_functions() {
-#define TC_INSIDE_METAL_CODEGEN
+#define TI_INSIDE_METAL_CODEGEN
 #include <taichi/platform/metal/helpers.metal.h>
     kernel_src_code_ += kMetalHelpersSourceCode;
-#undef TC_INSIDE_METAL_CODEGEN
+#undef TI_INSIDE_METAL_CODEGEN
     emit("\n");
   }
 
@@ -425,7 +425,7 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void generate_serial_kernel(OffloadedStmt *stmt) {
-    TC_ASSERT(stmt->task_type == OffloadedStmt::TaskType::serial);
+    TI_ASSERT(stmt->task_type == OffloadedStmt::TaskType::serial);
     const std::string mtl_kernel_name = make_kernel_name();
     emit_mtl_kernel_func_sig(mtl_kernel_name);
     emit("  // serial");
@@ -445,7 +445,7 @@ class MetalKernelCodegen : public IRVisitor {
   }
 
   void generate_range_for_kernel(OffloadedStmt *stmt) {
-    TC_ASSERT(stmt->task_type == OffloadedStmt::TaskType::range_for);
+    TI_ASSERT(stmt->task_type == OffloadedStmt::TaskType::range_for);
     const std::string mtl_kernel_name = make_kernel_name();
     emit_mtl_kernel_func_sig(mtl_kernel_name);
 
@@ -570,7 +570,7 @@ void MetalCodeGen::lower() {
   auto ir = kernel_->ir;
   const bool print_ir = prog_->config.print_ir;
   if (print_ir) {
-    TC_TRACE("Initial IR:");
+    TI_TRACE("Initial IR:");
     irpass::print(ir);
   }
 
@@ -578,7 +578,7 @@ void MetalCodeGen::lower() {
     irpass::reverse_segments(ir);
     irpass::re_id(ir);
     if (print_ir) {
-      TC_TRACE("Segment reversed (for autodiff):");
+      TI_TRACE("Segment reversed (for autodiff):");
       irpass::print(ir);
     }
   }
@@ -586,21 +586,21 @@ void MetalCodeGen::lower() {
   irpass::lower(ir);
   irpass::re_id(ir);
   if (print_ir) {
-    TC_TRACE("Lowered:");
+    TI_TRACE("Lowered:");
     irpass::print(ir);
   }
 
   irpass::typecheck(ir);
   irpass::re_id(ir);
   if (print_ir) {
-    TC_TRACE("Typechecked:");
+    TI_TRACE("Typechecked:");
     irpass::print(ir);
   }
 
   irpass::demote_dense_struct_fors(ir);
   irpass::typecheck(ir);
   if (print_ir) {
-    TC_TRACE("Dense Struct-for demoted:");
+    TI_TRACE("Dense Struct-for demoted:");
     irpass::print(ir);
   }
 
@@ -609,7 +609,7 @@ void MetalCodeGen::lower() {
     irpass::simplify(ir);
     irpass::re_id(ir);
     if (print_ir) {
-      TC_TRACE("Simplified I:");
+      TI_TRACE("Simplified I:");
       irpass::print(ir);
     }
   }
@@ -619,12 +619,12 @@ void MetalCodeGen::lower() {
     irpass::full_simplify(ir);
     irpass::typecheck(ir);
     if (print_ir) {
-      TC_TRACE("Before make_adjoint:");
+      TI_TRACE("Before make_adjoint:");
       irpass::print(ir);
     }
     irpass::make_adjoint(ir);
     if (print_ir) {
-      TC_TRACE("After make_adjoint:");
+      TI_TRACE("After make_adjoint:");
       irpass::print(ir);
     }
     irpass::typecheck(ir);
@@ -633,27 +633,27 @@ void MetalCodeGen::lower() {
   irpass::lower_access(ir, prog_->config.use_llvm);
   irpass::re_id(ir);
   if (print_ir) {
-    TC_TRACE("Access Lowered:");
+    TI_TRACE("Access Lowered:");
     irpass::print(ir);
   }
 
   irpass::die(ir);
   irpass::re_id(ir);
   if (print_ir) {
-    TC_TRACE("DIEd:");
+    TI_TRACE("DIEd:");
     irpass::print(ir);
   }
 
   irpass::flag_access(ir);
   irpass::re_id(ir);
   if (print_ir) {
-    TC_TRACE("Access Flagged:");
+    TI_TRACE("Access Flagged:");
     irpass::print(ir);
   }
 
   irpass::constant_fold(ir);
   if (print_ir) {
-    TC_TRACE("Constant folded:");
+    TI_TRACE("Constant folded:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
@@ -661,21 +661,21 @@ void MetalCodeGen::lower() {
   global_tmps_buffer_size_ =
       std::max(irpass::offload(ir).total_size, (size_t)(1));
   if (print_ir) {
-    TC_TRACE("Offloaded:");
+    TI_TRACE("Offloaded:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::full_simplify(ir);
   if (print_ir) {
-    TC_TRACE("Simplified II:");
+    TI_TRACE("Simplified II:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::demote_atomics(ir);
   if (print_ir) {
-    TC_TRACE("Atomics demoted:");
+    TI_TRACE("Atomics demoted:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
diff --git a/taichi/backends/codegen_x86.cpp b/taichi/backends/codegen_x86.cpp
index d0fd469ca4b08..22704dc7db275 100644
--- a/taichi/backends/codegen_x86.cpp
+++ b/taichi/backends/codegen_x86.cpp
@@ -46,7 +46,7 @@ class CPUIRCodeGen : public IRVisitor {
   }
 
   void visit(RandStmt *stmt) {
-    TC_ASSERT(stmt->ret_type.data_type == DataType::f32);
+    TI_ASSERT(stmt->ret_type.data_type == DataType::f32);
     emit("const auto {} = {}::rand();", stmt->raw_name(),
          stmt->ret_data_type_name());
   }
@@ -122,7 +122,7 @@ class CPUIRCodeGen : public IRVisitor {
   }
 
   void visit(StructForStmt *for_stmt) {
-    TC_ASSERT_INFO(current_struct_for == nullptr,
+    TI_ASSERT_INFO(current_struct_for == nullptr,
                    "Struct for cannot be nested.");
     current_struct_for = for_stmt;
     emit("{{");
@@ -209,7 +209,7 @@ class CPUIRCodeGen : public IRVisitor {
         emit("{} = {}_;", loop_var->raw_name(), loop_var->raw_name());
       } else {
         // reversed loop
-        TC_ASSERT(for_stmt->vectorize == 1);
+        TI_ASSERT(for_stmt->vectorize == 1);
         emit("for (int {}_ = {} - 1; {}_ >= {}; {}_ = {}_ - {}) {{",
              loop_var->raw_name(), for_stmt->end->raw_name(),
              loop_var->raw_name(), for_stmt->begin->raw_name(),
@@ -279,7 +279,7 @@ class CPUIRCodeGen : public IRVisitor {
   }
 
   void visit(SNodeOpStmt *stmt) {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
     /*
     stmt->ret_type.data_type = DataType::i32;
     if (stmt->op_type == SNodeOpType::length) {
@@ -298,7 +298,7 @@ class CPUIRCodeGen : public IRVisitor {
              make_list(indices, ""));
       }
       if (stmt->op_type == SNodeOpType::append) {
-        TC_ASSERT(stmt->val->width() == 1);
+        TI_ASSERT(stmt->val->width() == 1);
         emit("{}_tmp->append({}({}[{}]));", snode->node_type_name,
              snode->ch[0]->node_type_name, stmt->val->raw_name(), l);
       } else if (stmt->op_type == SNodeOpType::clear) {
@@ -319,7 +319,7 @@ class CPUIRCodeGen : public IRVisitor {
         emit("activate_{}(root, {});", snode->node_type_name,
              make_list(indices, ""));
       } else {
-        TC_NOT_IMPLEMENTED
+        TI_NOT_IMPLEMENTED
       }
       emit("}}");
     }
@@ -332,9 +332,9 @@ class CPUIRCodeGen : public IRVisitor {
       if (mask) {
         emit("if ({}[{}]) ", mask->raw_name(), l);
       } else {
-        TC_ASSERT(stmt->val->ret_type.data_type == DataType::f32 ||
+        TI_ASSERT(stmt->val->ret_type.data_type == DataType::f32 ||
                   stmt->val->ret_type.data_type == DataType::i32);
-        TC_ASSERT(stmt->op_type == AtomicOpType::add);
+        TI_ASSERT(stmt->op_type == AtomicOpType::add);
         emit("atomic_add({}[{}], {}[{}]);", stmt->dest->raw_name(), l,
              stmt->val->raw_name(), l);
       }
@@ -352,7 +352,7 @@ class CPUIRCodeGen : public IRVisitor {
       std::vector<std::string> indices(max_num_indices, "0");  // = "(root, ";
       for (int i = 0; i < (int)stmt->indices.size(); i++) {
         if (snode->physical_index_position[i] != -1) {
-          // TC_ASSERT(snode->physical_index_position[i] != -1);
+          // TI_ASSERT(snode->physical_index_position[i] != -1);
           indices[snode->physical_index_position[i]] =
               stmt->indices[i]->raw_name() + fmt::format("[{}]", l);
         }
@@ -378,7 +378,7 @@ class CPUIRCodeGen : public IRVisitor {
             all_offsets_zero = false;
         }
         if (identical_indices) {
-          // TC_WARN("Weakened addressing");
+          // TI_WARN("Weakened addressing");
           weakened = true;
 
           std::string cond;
@@ -427,7 +427,7 @@ class CPUIRCodeGen : public IRVisitor {
     if (!current_program->config.force_vectorized_global_store) {
       for (int i = 0; i < stmt->data->ret_type.width; i++) {
         if (stmt->parent->mask()) {
-          TC_ASSERT(stmt->width() == 1);
+          TI_ASSERT(stmt->width() == 1);
           emit("if ({}[{}])", stmt->parent->mask()->raw_name(), i);
         }
         emit("*({} *){}[{}] = {}[{}];",
@@ -443,7 +443,7 @@ class CPUIRCodeGen : public IRVisitor {
     const int width = stmt->width();
     if (get_current_program().config.attempt_vectorized_load_cpu &&
         width >= 4 && stmt->ptr->is<ElementShuffleStmt>()) {
-      TC_ASSERT(stmt->ret_type.data_type == DataType::i32 ||
+      TI_ASSERT(stmt->ret_type.data_type == DataType::i32 ||
                 stmt->ret_type.data_type == DataType::f32);
 
       auto shuffle = stmt->ptr->as<ElementShuffleStmt>();
@@ -505,8 +505,8 @@ class CPUIRCodeGen : public IRVisitor {
   }
 
   void visit(ExternalPtrStmt *stmt) {
-    TC_ASSERT(stmt->width() == 1);
-    TC_ASSERT(stmt->indices.size() == 1);
+    TI_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->indices.size() == 1);
     auto dt = stmt->ret_type.data_type;
     emit("const {} *{}[1] = {{&{}[{}]}};", data_type_name(dt), stmt->raw_name(),
          stmt->base_ptrs[0]->raw_name(), stmt->indices[0]->raw_name());
@@ -529,7 +529,7 @@ class CPUIRCodeGen : public IRVisitor {
 
   void visit(AssertStmt *stmt) {
     emit("#if defined(TL_DEBUG)");
-    emit(R"(TC_ASSERT_INFO({}, "{}");)", stmt->val->raw_name(), stmt->text);
+    emit(R"(TI_ASSERT_INFO({}, "{}");)", stmt->val->raw_name(), stmt->text);
     emit("#endif");
   }
 
@@ -605,7 +605,7 @@ class CPUIRCodeGen : public IRVisitor {
 };
 
 void CPUCodeGen::lower_cpp() {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 }
 
 void CPUCodeGen::lower_llvm() {
@@ -617,27 +617,27 @@ void CPUCodeGen::lower_llvm() {
     print_ir = prog->config.print_ir;
   }
   if (print_ir) {
-    TC_TRACE("Initial IR:");
+    TI_TRACE("Initial IR:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   if (kernel->grad) {
     irpass::reverse_segments(ir);
     if (print_ir) {
-      TC_TRACE("Segment reversed (for autodiff):");
+      TI_TRACE("Segment reversed (for autodiff):");
       irpass::re_id(ir);
       irpass::print(ir);
     }
   }
   irpass::lower(ir);
   if (print_ir) {
-    TC_TRACE("Lowered:");
+    TI_TRACE("Lowered:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::typecheck(ir);
   if (print_ir) {
-    TC_TRACE("Typechecked:");
+    TI_TRACE("Typechecked:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
@@ -645,112 +645,112 @@ void CPUCodeGen::lower_llvm() {
     irpass::demote_dense_struct_fors(ir);
     irpass::typecheck(ir);
     if (print_ir) {
-      TC_TRACE("Dense Struct-for demoted:");
+      TI_TRACE("Dense Struct-for demoted:");
       irpass::print(ir);
     }
   }
   irpass::slp_vectorize(ir);
   if (print_ir) {
-    TC_TRACE("SLPed:");
+    TI_TRACE("SLPed:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::loop_vectorize(ir);
   if (print_ir) {
-    TC_TRACE("LoopVeced:");
+    TI_TRACE("LoopVeced:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::vector_split(ir, prog->config.max_vector_width,
                        prog->config.serial_schedule);
   if (print_ir) {
-    TC_TRACE("LoopSplitted:");
+    TI_TRACE("LoopSplitted:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::simplify(ir);
   if (print_ir) {
-    TC_TRACE("Simplified I:");
+    TI_TRACE("Simplified I:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   if (kernel->grad) {
     // irpass::re_id(ir);
-    // TC_TRACE("Primal:");
+    // TI_TRACE("Primal:");
     // irpass::print(ir);
     irpass::demote_atomics(ir);
     irpass::simplify(ir);
     irpass::make_adjoint(ir);
     irpass::typecheck(ir);
     if (print_ir) {
-      TC_TRACE("Adjoint:");
+      TI_TRACE("Adjoint:");
       irpass::re_id(ir);
       irpass::print(ir);
     }
   }
   irpass::lower_access(ir, true);
   if (print_ir) {
-    TC_TRACE("Access Lowered:");
+    TI_TRACE("Access Lowered:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::die(ir);
   if (print_ir) {
-    TC_TRACE("DIEd:");
+    TI_TRACE("DIEd:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::simplify(ir);
   if (print_ir) {
-    TC_TRACE("Simplified II:");
+    TI_TRACE("Simplified II:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
   irpass::die(ir);
   if (print_ir) {
-    TC_TRACE("DIEd:");
+    TI_TRACE("DIEd:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::flag_access(ir);
   if (print_ir) {
-    TC_TRACE("Access Flagged:");
+    TI_TRACE("Access Flagged:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::constant_fold(ir);
   if (print_ir) {
-    TC_TRACE("Constant folded:");
+    TI_TRACE("Constant folded:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::offload(ir);
   if (print_ir) {
-    TC_TRACE("Offloaded:");
+    TI_TRACE("Offloaded:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::full_simplify(ir);
   if (print_ir) {
-    TC_TRACE("Simplified III:");
+    TI_TRACE("Simplified III:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 
   irpass::demote_atomics(ir);
   if (print_ir) {
-    TC_TRACE("Atomics demoted:");
+    TI_TRACE("Atomics demoted:");
     irpass::re_id(ir);
     irpass::print(ir);
   }
 }
 
 void CPUCodeGen::lower() {
-  TC_PROFILER(__FUNCTION__)
+  TI_PROFILER(__FUNCTION__)
   if (prog->config.use_llvm) {
     lower_llvm();
   } else {
diff --git a/taichi/backends/kernel.cpp b/taichi/backends/kernel.cpp
index 2b8572d32e071..4b27ec84535f6 100644
--- a/taichi/backends/kernel.cpp
+++ b/taichi/backends/kernel.cpp
@@ -12,12 +12,12 @@ FunctionType KernelCodeGen::compile(taichi::Tlang::Program &prog,
   this->kernel = &kernel;
   lower();
   if (prog.config.use_llvm) {
-    TC_PROFILER("codegen llvm")
+    TI_PROFILER("codegen llvm")
     return codegen_llvm();
   } else {
     codegen();
     generate_binary("");
-    // TC_P(Time::get_time() - t);
+    // TI_P(Time::get_time() - t);
     return load_function();
   }
 }
diff --git a/taichi/backends/kernel.h b/taichi/backends/kernel.h
index 09d49e18b93df..8417b60553b3b 100644
--- a/taichi/backends/kernel.h
+++ b/taichi/backends/kernel.h
@@ -33,7 +33,7 @@ class KernelCodeGen : public CodeGenBase {
   virtual void codegen() = 0;
 
   virtual FunctionType codegen_llvm() {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
     return nullptr;
   }
 
diff --git a/taichi/backends/llvm_codegen_utils.cpp b/taichi/backends/llvm_codegen_utils.cpp
index 3b8a1d9365860..0dfbd707baf86 100644
--- a/taichi/backends/llvm_codegen_utils.cpp
+++ b/taichi/backends/llvm_codegen_utils.cpp
@@ -13,26 +13,26 @@ void check_func_call_signature(llvm::Value *func,
                                std::vector<Value *> arglist) {
   auto func_type = func->getType()->getPointerElementType();
   int num_params = func_type->getFunctionNumParams();
-  TC_ASSERT(num_params == arglist.size());
+  TI_ASSERT(num_params == arglist.size());
 
   for (int i = 0; i < (int)arglist.size(); i++) {
     auto required = func_type->getFunctionParamType(i);
     auto provided = arglist[i]->getType();
-    // TC_INFO("    required from context {}", (void *)&required->getContext());
-    // TC_INFO("    provided from context {}", (void *)&provided->getContext());
+    // TI_INFO("    required from context {}", (void *)&required->getContext());
+    // TI_INFO("    provided from context {}", (void *)&provided->getContext());
     if (required != provided) {
-      // TC_INFO("Function : {}", std::string(func->getName()));
-      // TC_INFO("    Type : {}", type_name(func->getType()));
+      // TI_INFO("Function : {}", std::string(func->getName()));
+      // TI_INFO("    Type : {}", type_name(func->getType()));
       if (&required->getContext() != &provided->getContext()) {
-        TC_INFO("  parameter {} types are from different contexts", i);
-        TC_INFO("    required from context {}",
+        TI_INFO("  parameter {} types are from different contexts", i);
+        TI_INFO("    required from context {}",
                 (void *)&required->getContext());
-        TC_INFO("    provided from context {}",
+        TI_INFO("    provided from context {}",
                 (void *)&provided->getContext());
       }
-      TC_INFO("  parameter {} mismatch: required={}, provided={}", i,
+      TI_INFO("  parameter {} mismatch: required={}, provided={}", i,
               type_name(required), type_name(provided));
-      TC_ERROR("Bad function signature.");
+      TI_ERROR("Bad function signature.");
     }
   }
 }
diff --git a/taichi/backends/llvm_codegen_utils.h b/taichi/backends/llvm_codegen_utils.h
index 853c3caeac5ee..e72cfb8b8ed6e 100644
--- a/taichi/backends/llvm_codegen_utils.h
+++ b/taichi/backends/llvm_codegen_utils.h
@@ -71,7 +71,7 @@ class ModuleBuilder {
   llvm::Type *get_runtime_type(const std::string &name) {
     auto ty = module->getTypeByName("struct." + name);
     if (!ty) {
-      TC_ERROR("Runtime type {} not found.", name);
+      TI_ERROR("Runtime type {} not found.", name);
     }
     return ty;
   }
@@ -79,7 +79,7 @@ class ModuleBuilder {
   llvm::Function *get_runtime_function(const std::string &name) {
     auto f = module->getFunction(name);
     if (!f) {
-      TC_ERROR("Runtime function {} not found.", name);
+      TI_ERROR("Runtime function {} not found.", name);
     }
     f->removeAttribute(AttributeList::FunctionIndex,
                        llvm::Attribute::OptimizeNone);
diff --git a/taichi/backends/llvm_jit.h b/taichi/backends/llvm_jit.h
index 2d3d138f08086..f166231ffca06 100644
--- a/taichi/backends/llvm_jit.h
+++ b/taichi/backends/llvm_jit.h
@@ -108,7 +108,7 @@ class TaichiLLVMJIT {
         return JTMB.takeError();
       jtmb = std::make_unique<JITTargetMachineBuilder>(std::move(*JTMB));
     } else {
-      TC_ASSERT(arch == Arch::cuda);
+      TI_ASSERT(arch == Arch::cuda);
       Triple triple("nvptx64", "nvidia", "cuda");
       jtmb = std::make_unique<JITTargetMachineBuilder>(triple);
     }
@@ -186,8 +186,8 @@ class TaichiLLVMJIT {
 inline void *jit_lookup_name(TaichiLLVMJIT *jit, const std::string &name) {
   auto ExprSymbol = jit->lookup(name);
   if (!ExprSymbol)
-    TC_ERROR("Function \"{}\" not found", name);
+    TI_ERROR("Function \"{}\" not found", name);
   return (void *)(llvm::cantFail(ExprSymbol.getAddress()));
 }
 
-TLANG_NAMESPACE_END
\ No newline at end of file
+TLANG_NAMESPACE_END
diff --git a/taichi/backends/llvm_jit_ptx.cpp b/taichi/backends/llvm_jit_ptx.cpp
index 4986c7e9317b8..f0405b39ec1aa 100644
--- a/taichi/backends/llvm_jit_ptx.cpp
+++ b/taichi/backends/llvm_jit_ptx.cpp
@@ -34,7 +34,7 @@ std::string compile_module_to_ptx(std::unique_ptr<llvm::Module> &module) {
   std::string err_str;
   const llvm::Target *target =
       TargetRegistry::lookupTarget(triple.str(), err_str);
-  TC_ERROR_UNLESS(target, err_str);
+  TI_ERROR_UNLESS(target, err_str);
 
   bool fast_math = get_current_program().config.fast_math;
 
@@ -63,7 +63,7 @@ std::string compile_module_to_ptx(std::unique_ptr<llvm::Module> &module) {
       triple.str(), cuda_context->get_mcpu(), cuda_mattrs(), options,
       llvm::Reloc::PIC_, llvm::CodeModel::Small, CodeGenOpt::Aggressive));
 
-  TC_ERROR_UNLESS(target_machine.get(), "Could not allocate target machine!");
+  TI_ERROR_UNLESS(target_machine.get(), "Could not allocate target machine!");
 
   module->setDataLayout(target_machine->createDataLayout());
 
@@ -127,7 +127,7 @@ std::string compile_module_to_ptx(std::unique_ptr<llvm::Module> &module) {
       module_pass_manager, ostream, nullptr, TargetMachine::CGFT_AssemblyFile,
       true);
 
-  TC_ERROR_IF(fail, "Failed to set up passes to emit PTX source\n");
+  TI_ERROR_IF(fail, "Failed to set up passes to emit PTX source\n");
 
   // Run optimization passes
   function_pass_manager.doInitialization();
@@ -153,7 +153,7 @@ CUDAContext::CUDAContext() {
 
   char name[128];
   check_cuda_error(cuDeviceGetName(name, 128, device));
-  TC_TRACE("Using CUDA Device [id=0]: {}", name);
+  TI_TRACE("Using CUDA Device [id=0]: {}", name);
 
   int cc_major, cc_minor;
   check_cuda_error(cuDeviceGetAttribute(
@@ -161,7 +161,7 @@ CUDAContext::CUDAContext() {
   check_cuda_error(cuDeviceGetAttribute(
       &cc_minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device));
 
-  TC_TRACE("CUDA Device Compute Capability: {}.{}", cc_major, cc_minor);
+  TI_TRACE("CUDA Device Compute Capability: {}.{}", cc_major, cc_minor);
   check_cuda_error(cuCtxCreate(&context, 0, device));
   check_cuda_error(cudaMalloc(&context_buffer, sizeof(Context)));
 
@@ -173,13 +173,13 @@ CUmodule CUDAContext::compile(const std::string &ptx) {
   make_current();
   // Create module for object
   CUmodule cudaModule;
-  TC_TRACE("PTX size: {:.2f}KB", ptx.size() / 1024.0);
+  TI_TRACE("PTX size: {:.2f}KB", ptx.size() / 1024.0);
   auto t = Time::get_time();
-  TC_TRACE("Loading module...");
+  TI_TRACE("Loading module...");
   auto _ = std::lock_guard<std::mutex>(cuda_context->lock);
   check_cuda_error(
       cuModuleLoadDataEx(&cudaModule, ptx.c_str(), 0, nullptr, nullptr));
-  TC_TRACE("CUDA module load time : {}ms", (Time::get_time() - t) * 1000);
+  TI_TRACE("CUDA module load time : {}ms", (Time::get_time() - t) * 1000);
   cudaModules.push_back(cudaModule);
   return cudaModule;
 }
@@ -192,7 +192,7 @@ CUfunction CUDAContext::get_function(CUmodule module,
   auto t = Time::get_time();
   check_cuda_error(cuModuleGetFunction(&func, module, func_name.c_str()));
   t = Time::get_time() - t;
-  TC_TRACE("Kernel {} compilation time: {}ms", func_name, t * 1000);
+  TI_TRACE("Kernel {} compilation time: {}ms", func_name, t * 1000);
   return func;
 }
 
@@ -230,7 +230,7 @@ void CUDAContext::launch(CUfunction func,
     check_cuda_error(cudaDeviceSynchronize());
     auto err = cudaGetLastError();
     if (err) {
-      TC_ERROR("CUDA Kernel Launch Error: {}", cudaGetErrorString(err));
+      TI_ERROR("CUDA Kernel Launch Error: {}", cudaGetErrorString(err));
     }
   }
 }
@@ -246,13 +246,13 @@ CUDAContext::~CUDAContext() {
 
 #else
 std::string compile_module_to_ptx(std::unique_ptr<llvm::Module> &module) {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 }
 
 int compile_ptx_and_launch(const std::string &ptx,
                            const std::string &kernel_name,
                            void *) {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 }
 #endif
 
diff --git a/taichi/backends/loopgen.cpp b/taichi/backends/loopgen.cpp
index d99f1ae94e6ea..655f5e2803a64 100644
--- a/taichi/backends/loopgen.cpp
+++ b/taichi/backends/loopgen.cpp
@@ -8,14 +8,14 @@
 #if defined(__host__)
 #undef __host__
 #endif
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
 #include <cuda_runtime.h>
 #endif
 
 TLANG_NAMESPACE_BEGIN
 
 LoopGenerator::LoopGenerator(taichi::Tlang::CodeGenBase *gen) : gen(gen) {
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
   int num_SMs;
   cudaDeviceGetAttribute(&num_SMs, cudaDevAttrMultiProcessorCount, 0);
   grid_dim = num_SMs * 32;  // each SM can have 16-32 resident blocks
@@ -41,8 +41,8 @@ void LoopGenerator::emit_listgen_func(SNode *snode,
   } else {
     child_block_size /= child_block_division;
   }
-  // TC_P(child_block_size);
-  // TC_P(child_block_division);
+  // TI_P(child_block_size);
+  // TI_P(child_block_division);
 
   auto parent = snode->parent;
 
@@ -106,7 +106,7 @@ void LoopGenerator::emit_listgen_func(SNode *snode,
     emit("auto list_element = ({}::child_type *)leaves[leaf_loop].ptr;",
          parent->node_type_name);
     auto chid = parent->child_id(snode);
-    TC_ASSERT(chid != -1);
+    TI_ASSERT(chid != -1);
     emit("auto {}_cache = list_element->get{}();", snode->node_type_name, chid);
     for (int i = 0; i < max_num_indices; i++) {
       emit("auto {} = leaves[leaf_loop].indices[{}];",
@@ -128,7 +128,7 @@ void LoopGenerator::emit_listgen_func(SNode *snode,
     emit("if (cid >= input_meta.end_loop) break;");
     emit("if (!{}_cache->is_active(cid)) continue;", parent->node_type_name);
     if (deactivate) {
-      TC_ASSERT(child_block_division == 1);
+      TI_ASSERT(child_block_division == 1);
       emit("{}_cache->deactivate(cid);", parent->node_type_name);
     }
   } else {
diff --git a/taichi/backends/loopgen.h b/taichi/backends/loopgen.h
index cd93c1cd31be0..1f200b1103143 100644
--- a/taichi/backends/loopgen.h
+++ b/taichi/backends/loopgen.h
@@ -34,7 +34,7 @@ class LoopGenerator {
   }
 
   void generate_loop_header(SNode *snode, StructForStmt *stmt) {
-    TC_ASSERT(snode->type != SNodeType::place)
+    TI_ASSERT(snode->type != SNodeType::place)
     if (snode->parent != nullptr) {
       generate_loop_header(snode->parent, stmt);
     }
@@ -72,7 +72,7 @@ class LoopGenerator {
                                  snode->extractors[i].num_bits,
                                  snode->extractors[i].start);
         } else {
-          TC_ASSERT(snode->num_active_indices <= 3);
+          TI_ASSERT(snode->num_active_indices <= 3);
           uint32 mask = mask_base[snode->num_active_indices - 1]
                         << (snode->num_active_indices - morton_id - 1);
           morton_id++;
@@ -111,7 +111,7 @@ class LoopGenerator {
            snode->node_type_name);
     }
     if (snode->_multi_threaded) {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
 
     if (snode->type == SNodeType::hash) {
diff --git a/taichi/backends/struct.cpp b/taichi/backends/struct.cpp
index 59fdc7257f710..e89d7e70b521b 100644
--- a/taichi/backends/struct.cpp
+++ b/taichi/backends/struct.cpp
@@ -9,14 +9,14 @@ TLANG_NAMESPACE_BEGIN
 StructCompiler::StructCompiler(Program *prog)
     : CodeGenBase(), loopgen(this), prog(prog) {
   creator = [] {
-    TC_ERROR("Not Specified");
+    TI_ERROR("Not Specified");
     return nullptr;
   };
   profiler_clear = [] {
-    TC_WARN("Profiler not yet implemented in this backend.");
+    TI_WARN("Profiler not yet implemented in this backend.");
   };
   profiler_print = [] {
-    TC_WARN("Profiler not yet implemented in this backend.");
+    TI_WARN("Profiler not yet implemented in this backend.");
   };
 
   if (get_current_program().config.arch == Arch::x86_64)
@@ -42,7 +42,7 @@ void StructCompiler::collect_snodes(SNode &snode) {
 }
 
 void StructCompiler::infer_snode_properties(SNode &snode) {
-  // TC_P(snode.type_name());
+  // TI_P(snode.type_name());
   for (int ch_id = 0; ch_id < (int)snode.ch.size(); ch_id++) {
     auto &ch = snode.ch[ch_id];
     ch->parent = &snode;
@@ -71,7 +71,7 @@ void StructCompiler::infer_snode_properties(SNode &snode) {
     if (ch_id == 0) {
       snode.total_bit_start = total_bits_start_inferred;
     } else if (snode.parent != nullptr) {  // root is ok
-      // TC_ASSERT(snode.total_bit_start == total_bits_start_inferred);
+      // TI_ASSERT(snode.total_bit_start == total_bits_start_inferred);
     }
     // infer extractors
     int acc_offsets = 0;
@@ -82,9 +82,9 @@ void StructCompiler::infer_snode_properties(SNode &snode) {
         snode.extractors[i].acc_offset = acc_offsets;
       } else if (snode.parent != nullptr) {  // root is OK
         /*
-        TC_ASSERT_INFO(snode.extractors[i].start == inferred,
+        TI_ASSERT_INFO(snode.extractors[i].start == inferred,
                        "Inconsistent bit configuration");
-        TC_ASSERT_INFO(snode.extractors[i].dest_offset ==
+        TI_ASSERT_INFO(snode.extractors[i].dest_offset ==
                            snode.total_bit_start + acc_offsets,
                        "Inconsistent bit configuration");
                        */
@@ -98,14 +98,14 @@ void StructCompiler::infer_snode_properties(SNode &snode) {
           active_extractor_counder += 1;
           SNode *p = snode.parent;
           while (p) {
-            TC_ASSERT_INFO(
+            TI_ASSERT_INFO(
                 p->extractors[i].num_bits == 0,
                 "Dynamic SNode must have a standalone dimensionality.");
             p = p->parent;
           }
         }
       }
-      TC_ASSERT_INFO(active_extractor_counder == 1,
+      TI_ASSERT_INFO(active_extractor_counder == 1,
                      "Dynamic SNode can have only one index extractor.");
     }
   }
@@ -126,7 +126,7 @@ void StructCompiler::infer_snode_properties(SNode &snode) {
 
   if (snode.ch.empty()) {
     if (snode.type != SNodeType::place && snode.type != SNodeType::root) {
-      TC_ERROR("{} node must have at least one child.",
+      TI_ERROR("{} node must have at least one child.",
                snode_type_name(snode.type));
     }
   }
@@ -136,7 +136,7 @@ void StructCompiler::generate_types(SNode &snode) {
   auto type = snode.type;
 
   if (snode.type != SNodeType::place && snode.ch.empty()) {
-    TC_ERROR("Non-place node should have at least one child.");
+    TI_ERROR("Non-place node should have at least one child.");
   }
 
   // create children type that supports forking...
@@ -145,12 +145,12 @@ void StructCompiler::generate_types(SNode &snode) {
     emit("{} member{};", snode.ch[i]->node_type_name, i);
   }
   if (snode.ch.size() == 1 && snode.ch[0]->type == SNodeType::place) {
-    emit("TC_DEVICE {}_ch({} v) {{*get0()=v;}}", snode.node_type_name,
+    emit("TI_DEVICE {}_ch({} v) {{*get0()=v;}}", snode.node_type_name,
          snode.ch[0]->node_type_name);
-    emit("TC_DEVICE {}_ch() = default;", snode.node_type_name);
+    emit("TI_DEVICE {}_ch() = default;", snode.node_type_name);
   }
   for (int i = 0; i < (int)snode.ch.size(); i++) {
-    emit("TC_DEVICE {} *get{}() {{return &member{};}} ",
+    emit("TI_DEVICE {} *get{}() {{return &member{};}} ",
          snode.ch[i]->node_type_name, i, i);
   }
   emit("}};");
@@ -172,15 +172,15 @@ void StructCompiler::generate_types(SNode &snode) {
     emit("using {} = hash<{}_ch>;", snode.node_type_name, snode.node_type_name);
   } else if (type == SNodeType::place) {
     emit(
-        "struct {} {{ using val_type = {}; val_type val; TC_DEVICE operator "
+        "struct {} {{ using val_type = {}; val_type val; TI_DEVICE operator "
         "{}() {{return val;}} "
-        "TC_DEVICE {}(){{}} TC_DEVICE {}({} val) "
+        "TI_DEVICE {}(){{}} TI_DEVICE {}({} val) "
         ": val(val){{ }} }};",
         snode.node_type_name, snode.data_type_name(), snode.data_type_name(),
         snode.node_type_name, snode.node_type_name, snode.data_type_name());
   } else {
-    TC_P(snode.type_name());
-    TC_NOT_IMPLEMENTED;
+    TI_P(snode.type_name());
+    TI_NOT_IMPLEMENTED;
   }
 
   if (snode.has_null()) {
@@ -203,14 +203,14 @@ void StructCompiler::generate_leaf_accessors(SNode &snode) {
 
   if (!is_leaf) {
     // Chain accessors for non-leaf nodes
-    TC_ASSERT(snode.ch.size() > 0);
+    TI_ASSERT(snode.ch.size() > 0);
     for (int i = 0; i < (int)snode.ch.size(); i++) {
       auto ch = snode.ch[i];
       emit(
           "TLANG_ACCESSOR {} *access_{}({} *parent, int i "
           ") {{",
           ch->node_type_name, ch->node_type_name, snode.node_type_name);
-      // emit("#if defined(TC_STRUCT)");
+      // emit("#if defined(TI_STRUCT)");
       // emit("parent->activate(i, index);");
       // emit("#endif");
       emit("auto lookup = parent->look_up(i); ");
@@ -225,7 +225,7 @@ void StructCompiler::generate_leaf_accessors(SNode &snode) {
   }
   // SNode::place & indirect
   // emit end2end accessors for leaf (place) nodes, using chain accessors
-  TC_ASSERT(max_num_indices == 4);
+  TI_ASSERT(max_num_indices == 4);
   constexpr int mode_weak_access = 0;
   constexpr int mode_strong_access = 1;
   constexpr int mode_activate = 2;
@@ -245,7 +245,7 @@ void StructCompiler::generate_leaf_accessors(SNode &snode) {
     auto ret_type =
         mode == mode_query ? "bool" : fmt::format("{} *", snode.node_type_name);
     emit(
-        "TLANG_ACCESSOR TC_EXPORT {} {}_{}(void *root, int i0=0, int i1=0, "
+        "TLANG_ACCESSOR TI_EXPORT {} {}_{}(void *root, int i0=0, int i1=0, "
         "int "
         "i2=0, "
         "int i3=0) {{",
@@ -268,7 +268,7 @@ void StructCompiler::generate_leaf_accessors(SNode &snode) {
             emit("tmp = (tmp << {}) + ((i{} >> {}) & ((1 << {}) - 1));",
                  e.num_bits, j, e.start, e.num_bits);
           } else {
-            TC_WARN("Emitting shortcut indexing");
+            TI_WARN("Emitting shortcut indexing");
             emit("tmp = i{};", j);
           }
         }
@@ -278,7 +278,7 @@ void StructCompiler::generate_leaf_accessors(SNode &snode) {
         if (force_activate)
           emit("#if 1");
         else
-          emit("#if defined(TC_STRUCT)");
+          emit("#if defined(TI_STRUCT)");
       }
       if (stack[i]->type != SNodeType::place) {
         if (mode == mode_query) {
@@ -372,13 +372,13 @@ void StructCompiler::run(SNode &root, bool host) {
   for (int i = 0; i < (int)snodes.size(); i++) {
     if (snodes[i]->type != SNodeType::place)
       emit(
-          "TC_EXPORT AllocatorStat stat_{}() {{return "
+          "TI_EXPORT AllocatorStat stat_{}() {{return "
           "Managers::get_allocator<{}>()->get_stat();}} ",
           snodes[i]->node_type_name, snodes[i]->node_type_name);
     if (snodes[i]->type == SNodeType::pointer ||
         snodes[i]->type == SNodeType::hash) {
       emit(
-          "TC_EXPORT void clear_{}(int flags) {{"
+          "TI_EXPORT void clear_{}(int flags) {{"
           "Managers::get_allocator<{}>()->clear(flags);}} ",
           snodes[i]->node_type_name, snodes[i]->node_type_name);
     }
@@ -386,12 +386,12 @@ void StructCompiler::run(SNode &root, bool host) {
 
   root_type = root.node_type_name;
   generate_leaf_accessors(root);
-  emit("#if defined(TC_STRUCT)");
-  emit("TC_EXPORT void *create_data_structure() {{");
+  emit("#if defined(TI_STRUCT)");
+  emit("TI_EXPORT void *create_data_structure() {{");
 
   emit("Managers::initialize();");
 
-  TC_ASSERT((int)snodes.size() <= max_num_snodes);
+  TI_ASSERT((int)snodes.size() <= max_num_snodes);
   for (int i = 0; i < (int)snodes.size(); i++) {
     // if (snodes[i]->type == SNodeType::pointer ||
     // snodes[i]->type == SNodeType::hashed) {
@@ -424,10 +424,10 @@ void StructCompiler::run(SNode &root, bool host) {
   // emit("CPUProfiler profiler;");
   emit("#endif");
 
-  emit("TC_EXPORT void release_data_structure(void *ds) {{delete ({} *)ds;}}",
+  emit("TI_EXPORT void release_data_structure(void *ds) {{delete ({} *)ds;}}",
        root_type);
 
-  emit("TC_EXPORT void profiler_print()");
+  emit("TI_EXPORT void profiler_print()");
   emit("{{");
   emit("#if defined(TLANG_GPU)");
   emit("CUDAProfiler::get_instance().print();");
@@ -436,7 +436,7 @@ void StructCompiler::run(SNode &root, bool host) {
   emit("#endif");
   emit("}}");
 
-  emit("TC_EXPORT void profiler_clear()");
+  emit("TI_EXPORT void profiler_clear()");
   emit("{{");
   emit("#if defined(TLANG_GPU)");
   emit("CUDAProfiler::get_instance().clear();");
@@ -452,7 +452,7 @@ void StructCompiler::run(SNode &root, bool host) {
   emit("}} }}");
   write_source();
 
-  generate_binary("-DTC_STRUCT");
+  generate_binary("-DTI_STRUCT");
   load_dll();
   creator = load_function<void *(*)()>("create_data_structure");
   profiler_print = load_function<void (*)()>("profiler_print");
diff --git a/taichi/backends/struct_llvm.cpp b/taichi/backends/struct_llvm.cpp
index 366afaaa61daf..eea70fba98438 100644
--- a/taichi/backends/struct_llvm.cpp
+++ b/taichi/backends/struct_llvm.cpp
@@ -11,7 +11,7 @@
 TLANG_NAMESPACE_BEGIN
 
 void assert_failed_host(const char *msg) {
-  TC_ERROR("Assertion failure: {}", msg);
+  TI_ERROR("Assertion failure: {}", msg);
 }
 
 StructCompilerLLVM::StructCompilerLLVM(Program *prog, Arch arch)
@@ -19,7 +19,7 @@ StructCompilerLLVM::StructCompilerLLVM(Program *prog, Arch arch)
       ModuleBuilder(prog->get_llvm_context(arch)->get_init_module()),
       arch(arch) {
   creator = [] {
-    TC_WARN("Data structure creation not implemented");
+    TI_WARN("Data structure creation not implemented");
     return nullptr;
   };
   tlctx = prog->get_llvm_context(arch);
@@ -54,7 +54,7 @@ void StructCompilerLLVM::generate_types(SNode &snode) {
 
   llvm::Type *body_type = nullptr, *aux_type = nullptr;
   if (type == SNodeType::dense) {
-    TC_ASSERT(snode._morton == false);
+    TI_ASSERT(snode._morton == false);
     body_type = llvm::ArrayType::get(ch_type, snode.max_num_elements());
     if (snode._bitmasked) {
       aux_type = llvm::ArrayType::get(Type::getInt32Ty(*llvm_ctx),
@@ -72,12 +72,14 @@ void StructCompilerLLVM::generate_types(SNode &snode) {
     } else if (snode.dt == DataType::f64){
       body_type = llvm::Type::getDoubleTy(*ctx);
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
   } else if (type == SNodeType::pointer) {
     // mutex
-    aux_type = llvm::PointerType::getInt64Ty(*ctx);
-    body_type = llvm::PointerType::getInt8PtrTy(*ctx);
+    aux_type = llvm::ArrayType::get(llvm::PointerType::getInt64Ty(*ctx),
+                                    snode.max_num_elements());
+    body_type = llvm::ArrayType::get(llvm::PointerType::getInt8PtrTy(*ctx),
+                                     snode.max_num_elements());
   } else if (type == SNodeType::dynamic) {
     // mutex and n (number of elements)
     aux_type =
@@ -85,8 +87,8 @@ void StructCompilerLLVM::generate_types(SNode &snode) {
                                      llvm::PointerType::getInt32Ty(*ctx)});
     body_type = llvm::PointerType::getInt8PtrTy(*ctx);
   } else {
-    TC_P(snode.type_name());
-    TC_NOT_IMPLEMENTED;
+    TI_P(snode.type_name());
+    TI_NOT_IMPLEMENTED;
   }
   if (aux_type != nullptr) {
     llvm_type = llvm::StructType::create(*ctx, {aux_type, body_type}, "");
@@ -96,7 +98,7 @@ void StructCompilerLLVM::generate_types(SNode &snode) {
     snode.has_aux_structure = false;
   }
 
-  TC_ASSERT(llvm_type != nullptr);
+  TI_ASSERT(llvm_type != nullptr);
   snode_attr[snode].llvm_type = llvm_type;
   snode_attr[snode].llvm_aux_type = aux_type;
   snode_attr[snode].llvm_body_type = body_type;
@@ -230,11 +232,11 @@ void StructCompilerLLVM::run(SNode &root, bool host) {
   generate_leaf_accessors(root);
 
   if (prog->config.print_struct_llvm_ir) {
-    TC_INFO("Struct Module IR");
+    TI_INFO("Struct Module IR");
     module->print(errs(), nullptr);
   }
 
-  TC_ASSERT((int)snodes.size() <= max_num_snodes);
+  TI_ASSERT((int)snodes.size() <= max_num_snodes);
 
   auto root_size =
       tlctx->jit->getDataLayout().getTypeAllocSize(snode_attr[root].llvm_type);
@@ -284,7 +286,7 @@ void StructCompilerLLVM::run(SNode &root, bool host) {
     auto root_id = root.id;
     auto prog = this->prog;
     creator = [=]() {
-      TC_TRACE("Allocating data structure of size {} B", root_size);
+      TI_TRACE("Allocating data structure of size {} B", root_size);
       auto root = initialize_runtime(
           &prog->llvm_runtime, prog, (int)snodes.size(), root_size,
           (void *)&taichi_allocate_aligned, logger.get_level() <= 1);
@@ -309,11 +311,11 @@ void StructCompilerLLVM::run(SNode &root, bool host) {
                 tlctx->get_type_size(snode_attr[snodes[i]].llvm_element_type) *
                     snodes[i]->chunk_size;
           }
-          TC_TRACE("Initializing allocator for snode {} (node size {})",
+          TI_TRACE("Initializing allocator for snode {} (node size {})",
                   snodes[i]->id, node_size);
           auto rt = prog->llvm_runtime;
           initialize_allocator(rt, i, node_size);
-          TC_TRACE("Allocating ambient element for snode {} (node size {})",
+          TI_TRACE("Allocating ambient element for snode {} (node size {})",
                   snodes[i]->id, node_size);
           allocate_ambient(rt, i);
         }
diff --git a/taichi/backends/struct_metal.cpp b/taichi/backends/struct_metal.cpp
index 92dd8e035d7b0..4fc12240bef9b 100644
--- a/taichi/backends/struct_metal.cpp
+++ b/taichi/backends/struct_metal.cpp
@@ -4,7 +4,7 @@ TLANG_NAMESPACE_BEGIN
 namespace metal {
 
 MetalStructCompiler::CompiledResult MetalStructCompiler::run(SNode &node) {
-  TC_ASSERT(node.type == SNodeType::root);
+  TI_ASSERT(node.type == SNodeType::root);
   collect_snodes(node);
   // The host side has run this!
   // infer_snode_properties(node);
@@ -85,9 +85,9 @@ void MetalStructCompiler::generate_types(const SNode &snode) {
     emit("  device byte* addr_;");
     emit("}};");
   } else {
-    TC_ERROR("SNodeType={} not supported on Metal",
+    TI_ERROR("SNodeType={} not supported on Metal",
              snode_type_name(snode.type));
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
   emit("");
 }
diff --git a/taichi/common.h b/taichi/common.h
index 5c3816ca49d1c..f2bc3be219cdc 100644
--- a/taichi/common.h
+++ b/taichi/common.h
@@ -16,12 +16,12 @@
 #include <unordered_map>
 #include <iostream>
 
-#if !defined(TC_INCLUDED)
+#if !defined(TI_INCLUDED)
 
 #ifdef _WIN64
-#define TC_FORCE_INLINE __forceinline
+#define TI_FORCE_INLINE __forceinline
 #else
-#define TC_FORCE_INLINE inline __attribute__((always_inline))
+#define TI_FORCE_INLINE inline __attribute__((always_inline))
 #endif
 #include <cstdio>
 #include <string>
@@ -44,7 +44,7 @@ using int32 = std::int32_t;
 using int64 = std::int64_t;
 
 namespace taichi {
-TC_FORCE_INLINE uint32 rand_int() noexcept {
+TI_FORCE_INLINE uint32 rand_int() noexcept {
   static unsigned int x = 123456789, y = 362436069, z = 521288629, w = 88675123;
   unsigned int t = x ^ (x << 11);
   x = y;
@@ -53,30 +53,30 @@ TC_FORCE_INLINE uint32 rand_int() noexcept {
   return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)));
 }
 
-TC_FORCE_INLINE uint64 rand_int64() noexcept {
+TI_FORCE_INLINE uint64 rand_int64() noexcept {
   return ((uint64)rand_int() << 32) + rand_int();
 }
 
 template <typename T>
-TC_FORCE_INLINE T rand() noexcept;
+TI_FORCE_INLINE T rand() noexcept;
 
 template <>
-TC_FORCE_INLINE float rand<float>() noexcept {
+TI_FORCE_INLINE float rand<float>() noexcept {
   return rand_int() * (1.0f / 4294967296.0f);
 }
 
 template <>
-TC_FORCE_INLINE double rand<double>() noexcept {
+TI_FORCE_INLINE double rand<double>() noexcept {
   return rand_int() * (1.0 / 4294967296.0);
 }
 
 template <>
-TC_FORCE_INLINE int rand<int>() noexcept {
+TI_FORCE_INLINE int rand<int>() noexcept {
   return rand_int();
 }
 
 template <typename T>
-TC_FORCE_INLINE T rand() noexcept;
+TI_FORCE_INLINE T rand() noexcept;
 }  // namespace taichi
 
 #endif
diff --git a/taichi/common/asset_manager.cpp b/taichi/common/asset_manager.cpp
index 04f9cf9f3d35f..c322d66fef59d 100644
--- a/taichi/common/asset_manager.cpp
+++ b/taichi/common/asset_manager.cpp
@@ -1,11 +1,11 @@
 #include "util.h"
 #include "asset_manager.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 AssetManager &AssetManager::get_instance() {
   static AssetManager manager;
   return manager;
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/asset_manager.h b/taichi/common/asset_manager.h
index a089289770299..b68234da72d0e 100644
--- a/taichi/common/asset_manager.h
+++ b/taichi/common/asset_manager.h
@@ -12,7 +12,7 @@
 #include <memory>
 #include <iostream>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class AssetManager {
  private:
@@ -27,10 +27,10 @@ class AssetManager {
   // Note: this is not thread safe!
   template <typename T>
   std::shared_ptr<T> get_asset_(int id) {
-    TC_ASSERT_INFO(id_to_asset.find(id) != id_to_asset.end(),
+    TI_ASSERT_INFO(id_to_asset.find(id) != id_to_asset.end(),
                    "Asset not found");
     auto ptr = id_to_asset[id];
-    TC_ASSERT_INFO(!ptr.expired(), "Asset has been expired");
+    TI_ASSERT_INFO(!ptr.expired(), "Asset has been expired");
     return std::static_pointer_cast<T>(ptr.lock());
   }
 
@@ -38,7 +38,7 @@ class AssetManager {
   int insert_asset_(const std::shared_ptr<T> &ptr) {
     if (asset_to_id.find(ptr.get()) != asset_to_id.end()) {
       int existing_id = asset_to_id.find(ptr.get())->second;
-      TC_ASSERT_INFO(id_to_asset[existing_id].expired(),
+      TI_ASSERT_INFO(id_to_asset[existing_id].expired(),
                      "Asset already exists");
       asset_to_id.erase(ptr.get());
       id_to_asset.erase(existing_id);
@@ -62,4 +62,4 @@ class AssetManager {
   static AssetManager &get_instance();
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/bit.h b/taichi/common/bit.h
index fe2d4bfb0466c..de84f45d097ca 100644
--- a/taichi/common/bit.h
+++ b/taichi/common/bit.h
@@ -5,23 +5,23 @@
 #pragma once
 #include "util.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 namespace bit {
 
-TC_FORCE_INLINE constexpr bool is_power_of_two(int32 x) {
+TI_FORCE_INLINE constexpr bool is_power_of_two(int32 x) {
   return x != 0 && (x & (x - 1)) == 0;
 }
 
-TC_FORCE_INLINE constexpr bool is_power_of_two(uint32 x) {
+TI_FORCE_INLINE constexpr bool is_power_of_two(uint32 x) {
   return x != 0 && (x & (x - 1)) == 0;
 }
 
-TC_FORCE_INLINE constexpr bool is_power_of_two(int64 x) {
+TI_FORCE_INLINE constexpr bool is_power_of_two(int64 x) {
   return x != 0 && (x & (x - 1)) == 0;
 }
 
-TC_FORCE_INLINE constexpr bool is_power_of_two(uint64 x) {
+TI_FORCE_INLINE constexpr bool is_power_of_two(uint64 x) {
   return x != 0 && (x & (x - 1)) == 0;
 }
 
@@ -47,25 +47,25 @@ struct Bits {
   }
 
   template <int start, int bits = 1>
-  TC_FORCE_INLINE T get() const {
+  TI_FORCE_INLINE T get() const {
     return (data >> start) & (((T)1 << bits) - 1);
   }
 
   template <int start, int bits = 1>
-  TC_FORCE_INLINE void set(T val) {
+  TI_FORCE_INLINE void set(T val) {
     data =
         (data & ~mask<start, bits>()) | ((val << start) & mask<start, bits>());
   }
 
-  TC_FORCE_INLINE T operator()(T) const {
+  TI_FORCE_INLINE T operator()(T) const {
     return data;
   }
 
-  TC_FORCE_INLINE T get() const {
+  TI_FORCE_INLINE T get() const {
     return data;
   }
 
-  TC_FORCE_INLINE void set(const T &data) {
+  TI_FORCE_INLINE void set(const T &data) {
     this->data = data;
   }
 };
@@ -78,7 +78,7 @@ constexpr int bit_length() {
   return std::is_same<T, bool>() ? 1 : sizeof(T) * 8;
 }
 
-#define TC_BIT_FIELD(T, name, start)                    \
+#define TI_BIT_FIELD(T, name, start)                    \
   T get_##name() const {                                \
     return (T)Base::get<start, bit::bit_length<T>()>(); \
   }                                                     \
@@ -87,7 +87,7 @@ constexpr int bit_length() {
   }
 
 template <typename T, int N>
-TC_FORCE_INLINE constexpr T product(const std::array<T, N> arr) {
+TI_FORCE_INLINE constexpr T product(const std::array<T, N> arr) {
   T ret(1);
   for (int i = 0; i < N; i++) {
     ret *= arr[i];
@@ -103,11 +103,11 @@ constexpr std::size_t least_pot_bound(std::size_t v) {
   return ret;
 }
 
-TC_FORCE_INLINE constexpr uint32 pot_mask(int x) {
+TI_FORCE_INLINE constexpr uint32 pot_mask(int x) {
   return (1u << x) - 1;
 }
 
-TC_FORCE_INLINE constexpr uint32 log2int(uint64 value) {
+TI_FORCE_INLINE constexpr uint32 log2int(uint64 value) {
   int ret = 0;
   value >>= 1;
   while (value) {
@@ -118,18 +118,18 @@ TC_FORCE_INLINE constexpr uint32 log2int(uint64 value) {
 }
 
 template <typename G, typename T>
-constexpr TC_FORCE_INLINE copy_refcv_t<T, G> &&reinterpret_bits(T &&t) {
-  TC_STATIC_ASSERT(sizeof(G) == sizeof(T));
+constexpr TI_FORCE_INLINE copy_refcv_t<T, G> &&reinterpret_bits(T &&t) {
+  TI_STATIC_ASSERT(sizeof(G) == sizeof(T));
   return std::forward<copy_refcv_t<T, G>>(*reinterpret_cast<G *>(&t));
 };
 
-TC_FORCE_INLINE constexpr float64 compress(float32 h, float32 l) {
+TI_FORCE_INLINE constexpr float64 compress(float32 h, float32 l) {
   uint64 data =
       ((uint64)reinterpret_bits<uint32>(h) << 32) + reinterpret_bits<uint32>(l);
   return reinterpret_bits<float64>(data);
 }
 
-TC_FORCE_INLINE constexpr std::tuple<float32, float32> extract(float64 x) {
+TI_FORCE_INLINE constexpr std::tuple<float32, float32> extract(float64 x) {
   auto data = reinterpret_bits<uint64>(x);
   return std::make_tuple(reinterpret_bits<float32>((uint32)(data >> 32)),
                          reinterpret_bits<float32>((uint32)(data & (-1))));
@@ -137,4 +137,4 @@ TC_FORCE_INLINE constexpr std::tuple<float32, float32> extract(float64 x) {
 
 }  // namespace bit
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/dict.h b/taichi/common/dict.h
index 046e2b23bfb32..a5cde797563f3 100644
--- a/taichi/common/dict.h
+++ b/taichi/common/dict.h
@@ -18,11 +18,11 @@
 #include "util.h"
 #include "asset_manager.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 // Declare and then load
 // Load to `this`
-#define TC_LOAD_CONFIG(name, default_val) \
+#define TI_LOAD_CONFIG(name, default_val) \
   this->name = config.get(#name, default_val)
 
 class Dict {
@@ -30,7 +30,7 @@ class Dict {
   std::map<std::string, std::string> data;
 
  public:
-  TC_IO_DEF(data);
+  TI_IO_DEF(data);
 
   Dict() = default;
 
@@ -74,7 +74,7 @@ class Dict {
 
   void check_string_integral(const std::string &str) const {
     if (!is_string_integral(str)) {
-      TC_ERROR(
+      TI_ERROR(
           "Getting integral value out of non-integral string '{}' is not "
           "allowed.",
           str);
@@ -281,7 +281,7 @@ class Dict {
 
   std::string get_string(std::string key) const {
     if (data.find(key) == data.end()) {
-      TC_ERROR("No key named '{}' found.", key);
+      TI_ERROR("No key named '{}' found.", key);
     }
     return data.find(key)->second;
   }
@@ -360,4 +360,4 @@ inline bool Dict::get<bool>(std::string key) const {
 
 using Config = Dict;
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/interface.h b/taichi/common/interface.h
index 8f9415574cbb8..a9cc21f17be61 100644
--- a/taichi/common/interface.h
+++ b/taichi/common/interface.h
@@ -14,43 +14,43 @@
 #include <memory>
 #include <iostream>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <typename T>
-TC_EXPORT std::shared_ptr<T> create_instance(const std::string &alias);
+TI_EXPORT std::shared_ptr<T> create_instance(const std::string &alias);
 
 template <typename T>
-TC_EXPORT std::shared_ptr<T> create_instance(const std::string &alias,
+TI_EXPORT std::shared_ptr<T> create_instance(const std::string &alias,
                                              const Config &config);
 
 template <typename T>
-TC_EXPORT std::unique_ptr<T> create_instance_unique(const std::string &alias);
+TI_EXPORT std::unique_ptr<T> create_instance_unique(const std::string &alias);
 
 template <typename T>
-TC_EXPORT std::unique_ptr<T> create_instance_unique(const std::string &alias,
+TI_EXPORT std::unique_ptr<T> create_instance_unique(const std::string &alias,
                                                     const Config &config);
 template <typename T>
-TC_EXPORT std::unique_ptr<T> create_instance_unique_ctor(
+TI_EXPORT std::unique_ptr<T> create_instance_unique_ctor(
     const std::string &alias,
     const Config &config);
 
 template <typename T>
-TC_EXPORT T *create_instance_raw(const std::string &alias);
+TI_EXPORT T *create_instance_raw(const std::string &alias);
 
 template <typename T>
-TC_EXPORT T *create_instance_raw(const std::string &alias,
+TI_EXPORT T *create_instance_raw(const std::string &alias,
                                  const Config &config);
 
 template <typename T>
-TC_EXPORT T *create_instance_placement(const std::string &alias, void *place);
+TI_EXPORT T *create_instance_placement(const std::string &alias, void *place);
 
 template <typename T>
-TC_EXPORT T *create_instance_placement(const std::string &alias,
+TI_EXPORT T *create_instance_placement(const std::string &alias,
                                        void *place,
                                        const Config &config);
 
 template <typename T>
-TC_EXPORT std::vector<std::string> get_implementation_names();
+TI_EXPORT std::vector<std::string> get_implementation_names();
 
 class Unit {
  public:
@@ -65,12 +65,12 @@ class Unit {
   }
 
   virtual std::string get_name() const {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
     return "";
   }
 
   virtual std::string general_action(const Config &config) {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
     return "";
   }
 
@@ -78,8 +78,8 @@ class Unit {
   }
 };
 
-#define TC_IMPLEMENTATION_HOLDER_NAME(T) ImplementationHolder_##T
-#define TC_IMPLEMENTATION_HOLDER_PTR(T) instance_ImplementationHolder_##T
+#define TI_IMPLEMENTATION_HOLDER_NAME(T) ImplementationHolder_##T
+#define TI_IMPLEMENTATION_HOLDER_PTR(T) instance_ImplementationHolder_##T
 
 class ImplementationHolderBase {
  public:
@@ -114,12 +114,12 @@ class InterfaceHolder {
   }
 };
 
-#define TC_INTERFACE(T)                                                       \
+#define TI_INTERFACE(T)                                                       \
   extern void *get_implementation_holder_instance_##T();                      \
-  class TC_IMPLEMENTATION_HOLDER_NAME(T) final                                \
+  class TI_IMPLEMENTATION_HOLDER_NAME(T) final                                \
       : public ImplementationHolderBase {                                     \
    public:                                                                    \
-    TC_IMPLEMENTATION_HOLDER_NAME(T)(const std::string &name) {               \
+    TI_IMPLEMENTATION_HOLDER_NAME(T)(const std::string &name) {               \
       this->name = name;                                                      \
     }                                                                         \
     using FactoryMethod = std::function<std::shared_ptr<T>()>;                \
@@ -225,66 +225,66 @@ class InterfaceHolder {
                   "Implementation [" + name + "::" + alias + "] not found!"); \
       return (factory->second)(place);                                        \
     }                                                                         \
-    static TC_IMPLEMENTATION_HOLDER_NAME(T) * get_instance() {                \
-      return static_cast<TC_IMPLEMENTATION_HOLDER_NAME(T) *>(                 \
+    static TI_IMPLEMENTATION_HOLDER_NAME(T) * get_instance() {                \
+      return static_cast<TI_IMPLEMENTATION_HOLDER_NAME(T) *>(                 \
           get_implementation_holder_instance_##T());                          \
     }                                                                         \
   };                                                                          \
-  extern TC_IMPLEMENTATION_HOLDER_NAME(T) * TC_IMPLEMENTATION_HOLDER_PTR(T);
+  extern TI_IMPLEMENTATION_HOLDER_NAME(T) * TI_IMPLEMENTATION_HOLDER_PTR(T);
 
-#define TC_INTERFACE_DEF(class_name, base_alias)                              \
+#define TI_INTERFACE_DEF(class_name, base_alias)                              \
   template <>                                                                 \
-  TC_EXPORT std::shared_ptr<class_name> create_instance(                      \
+  TI_EXPORT std::shared_ptr<class_name> create_instance(                      \
       const std::string &alias) {                                             \
-    return TC_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()->create( \
+    return TI_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()->create( \
         alias);                                                               \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT std::shared_ptr<class_name> create_instance(                      \
+  TI_EXPORT std::shared_ptr<class_name> create_instance(                      \
       const std::string &alias, const Config &config) {                       \
     auto instance = create_instance<class_name>(alias);                       \
     instance->initialize(config);                                             \
     return instance;                                                          \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT std::unique_ptr<class_name> create_instance_unique(               \
+  TI_EXPORT std::unique_ptr<class_name> create_instance_unique(               \
       const std::string &alias) {                                             \
-    return TC_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
+    return TI_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
         ->create_unique(alias);                                               \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT std::unique_ptr<class_name> create_instance_unique(               \
+  TI_EXPORT std::unique_ptr<class_name> create_instance_unique(               \
       const std::string &alias, const Config &config) {                       \
     auto instance = create_instance_unique<class_name>(alias);                \
     instance->initialize(config);                                             \
     return instance;                                                          \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT std::unique_ptr<class_name> create_instance_unique_ctor(          \
+  TI_EXPORT std::unique_ptr<class_name> create_instance_unique_ctor(          \
       const std::string &alias, const Dict &config) {                         \
-    return TC_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
+    return TI_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
         ->create_unique_ctor(alias, config);                                  \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT class_name *create_instance_raw(const std::string &alias) {       \
-    return TC_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
+  TI_EXPORT class_name *create_instance_raw(const std::string &alias) {       \
+    return TI_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
         ->create_raw(alias);                                                  \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT class_name *create_instance_placement(const std::string &alias,   \
+  TI_EXPORT class_name *create_instance_placement(const std::string &alias,   \
                                                   void *place) {              \
-    return TC_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
+    return TI_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
         ->create_placement(alias, place);                                     \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT class_name *create_instance_placement(                            \
+  TI_EXPORT class_name *create_instance_placement(                            \
       const std::string &alias, void *place, const Config &config) {          \
     auto instance = create_instance_placement<class_name>(alias, place);      \
     instance->initialize(config);                                             \
     return instance;                                                          \
   }                                                                           \
   template <>                                                                 \
-  TC_EXPORT class_name *create_instance_raw(const std::string &alias,         \
+  TI_EXPORT class_name *create_instance_raw(const std::string &alias,         \
                                             const Config &config) {           \
     auto instance = create_instance_raw<class_name>(alias);                   \
     instance->initialize(config);                                             \
@@ -292,17 +292,17 @@ class InterfaceHolder {
   }                                                                           \
   template <>                                                                 \
   std::vector<std::string> get_implementation_names<class_name>() {           \
-    return TC_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
+    return TI_IMPLEMENTATION_HOLDER_NAME(class_name)::get_instance()          \
         ->get_implementation_names();                                         \
   }                                                                           \
-  TC_IMPLEMENTATION_HOLDER_NAME(class_name) *                                 \
-      TC_IMPLEMENTATION_HOLDER_PTR(class_name) = nullptr;                     \
+  TI_IMPLEMENTATION_HOLDER_NAME(class_name) *                                 \
+      TI_IMPLEMENTATION_HOLDER_PTR(class_name) = nullptr;                     \
   void *get_implementation_holder_instance_##class_name() {                   \
-    if (!TC_IMPLEMENTATION_HOLDER_PTR(class_name)) {                          \
-      TC_IMPLEMENTATION_HOLDER_PTR(class_name) =                              \
-          new TC_IMPLEMENTATION_HOLDER_NAME(class_name)(base_alias);          \
+    if (!TI_IMPLEMENTATION_HOLDER_PTR(class_name)) {                          \
+      TI_IMPLEMENTATION_HOLDER_PTR(class_name) =                              \
+          new TI_IMPLEMENTATION_HOLDER_NAME(class_name)(base_alias);          \
     }                                                                         \
-    return TC_IMPLEMENTATION_HOLDER_PTR(class_name);                          \
+    return TI_IMPLEMENTATION_HOLDER_PTR(class_name);                          \
   }                                                                           \
   class InterfaceInjector_##class_name {                                      \
    public:                                                                    \
@@ -329,25 +329,25 @@ class InterfaceHolder {
     }                                                                         \
   } ImplementationInjector_##base_class_name##class_name##instance(base_alias);
 
-#define TC_IMPLEMENTATION(base_class_name, class_name, alias)        \
+#define TI_IMPLEMENTATION(base_class_name, class_name, alias)        \
   class ImplementationInjector_##base_class_name##class_name {       \
    public:                                                           \
     ImplementationInjector_##base_class_name##class_name() {         \
-      TC_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance() \
+      TI_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance() \
           ->insert<class_name>(alias);                               \
     }                                                                \
   } ImplementationInjector_##base_class_name##class_name##instance;
 
-#define TC_IMPLEMENTATION_NEW(base_class_name, class_name)           \
+#define TI_IMPLEMENTATION_NEW(base_class_name, class_name)           \
   class ImplementationInjector_##base_class_name##class_name {       \
    public:                                                           \
     ImplementationInjector_##base_class_name##class_name() {         \
-      TC_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance() \
+      TI_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance() \
           ->insert_new<class_name>(class_name::get_name_static());   \
     }                                                                \
   } ImplementationInjector_##base_class_name##class_name##instance;
 
-#define TC_NAME(alias)                            \
+#define TI_NAME(alias)                            \
   virtual std::string get_name() const override { \
     return get_name_static();                     \
   }                                               \
@@ -355,4 +355,4 @@ class InterfaceHolder {
     return alias;                                 \
   }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/loader.h b/taichi/common/loader.h
index 769c01c3fcb12..9f17a3721ea65 100644
--- a/taichi/common/loader.h
+++ b/taichi/common/loader.h
@@ -7,28 +7,28 @@
 
 #include "interface.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-#define TC_IMPLEMENTATION_LOADER(base_class_name, class_name, alias)          \
+#define TI_IMPLEMENTATION_LOADER(base_class_name, class_name, alias)          \
   class ImplementationLoader_##base_class_name##class_name {                  \
    public:                                                                    \
     ImplementationLoader_##base_class_name##class_name() {                    \
-      TC_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance()          \
+      TI_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance()          \
           ->insert<class_name>(alias);                                        \
     }                                                                         \
     ~ImplementationLoader_##base_class_name##class_name() {                   \
-      TC_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance()->remove( \
+      TI_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance()->remove( \
           alias);                                                             \
     }                                                                         \
   } ImplementationLoader_##base_class_name##class_name##instance;
 
-#define TC_IMPLEMENTATION_UPDATER(base_class_name, class_name, alias) \
+#define TI_IMPLEMENTATION_UPDATER(base_class_name, class_name, alias) \
   class ImplementationUpdater_##base_class_name##class_name {         \
    public:                                                            \
     ImplementationUpdater_##base_class_name##class_name() {           \
-      TC_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance()  \
+      TI_IMPLEMENTATION_HOLDER_NAME(base_class_name)::get_instance()  \
           ->update<class_name>(alias);                                \
     }                                                                 \
   } ImplementationUpdater_##base_class_name##class_name##instance;
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/meta.h b/taichi/common/meta.h
index 13932bd61a4ec..07d76fef10a1f 100644
--- a/taichi/common/meta.h
+++ b/taichi/common/meta.h
@@ -17,7 +17,7 @@ namespace taichi {
 namespace meta {
 template <template <int> class F, int bgn, int end, typename... Args>
 struct RepeatFunctionHelper {
-  TC_FORCE_INLINE static void run(Args &&... args) {
+  TI_FORCE_INLINE static void run(Args &&... args) {
     F<bgn>::run(args...);
     RepeatFunctionHelper<F, bgn + 1, end, Args...>::run(
         std::forward<Args>(args)...);
@@ -26,13 +26,13 @@ struct RepeatFunctionHelper {
 
 template <template <int> class F, int bgn, typename... Args>
 struct RepeatFunctionHelper<F, bgn, bgn, Args...> {
-  TC_FORCE_INLINE static void run(Args &&... args) {
+  TI_FORCE_INLINE static void run(Args &&... args) {
     return;
   }
 };
 
 template <template <int> class F, int bgn, int end, typename... Args>
-TC_FORCE_INLINE void repeat_function(Args &&... args) {
+TI_FORCE_INLINE void repeat_function(Args &&... args) {
   RepeatFunctionHelper<F, bgn, end, Args...>::run(std::forward<Args>(args)...);
 }
 }  // namespace meta
@@ -104,27 +104,27 @@ using STATIC_IF::static_if;
 
 // Note the the behaviour of 'return' is still different in the following two
 // implementations.
-#if defined(TC_CPP17)
+#if defined(TI_CPP17)
 
-#define TC_STATIC_IF(x) if constexpr (x) {
-#define TC_STATIC_ELSE \
+#define TI_STATIC_IF(x) if constexpr (x) {
+#define TI_STATIC_ELSE \
   }                    \
   else {
-#define TC_STATIC_END_IF }
+#define TI_STATIC_END_IF }
 
 #else
 
-#define TC_STATIC_IF(x) taichi::static_if<(x)>([&](const auto& id) -> void {
-#define TC_STATIC_ELSE \
+#define TI_STATIC_IF(x) taichi::static_if<(x)>([&](const auto& id) -> void {
+#define TI_STATIC_ELSE \
   }).else_([&](const auto &id) -> void {
-#define TC_STATIC_END_IF \
+#define TI_STATIC_END_IF \
   });
 
 #endif
 
 template <typename T, typename G>
 struct copy_refcv {
-  TC_STATIC_ASSERT(
+  TI_STATIC_ASSERT(
       (std::is_same<G, std::remove_cv_t<std::remove_reference_t<G>>>::value));
   static constexpr bool has_lvalue_ref = std::is_lvalue_reference<T>::value;
   static constexpr bool has_rvalue_ref = std::is_rvalue_reference<T>::value;
@@ -141,28 +141,28 @@ struct copy_refcv {
 template <typename T, typename G>
 using copy_refcv_t = typename copy_refcv<T, G>::type;
 
-TC_STATIC_ASSERT((std::is_same<const volatile int, volatile const int>::value));
-TC_STATIC_ASSERT(
+TI_STATIC_ASSERT((std::is_same<const volatile int, volatile const int>::value));
+TI_STATIC_ASSERT(
     (std::is_same<int,
                   std::remove_volatile_t<
                       std::remove_const_t<const volatile int>>>::value));
-TC_STATIC_ASSERT(
+TI_STATIC_ASSERT(
     (std::is_same<int,
                   std::remove_const_t<
                       std::remove_volatile_t<const volatile int>>>::value));
-TC_STATIC_ASSERT((std::is_same<int &, std::add_const_t<int &>>::value));
-TC_STATIC_ASSERT((std::is_same<copy_refcv_t<int, real>, real>::value));
-TC_STATIC_ASSERT((std::is_same<copy_refcv_t<int &, real>, real &>::value));
-TC_STATIC_ASSERT((copy_refcv<const int &, real>::has_lvalue_ref));
-TC_STATIC_ASSERT(
+TI_STATIC_ASSERT((std::is_same<int &, std::add_const_t<int &>>::value));
+TI_STATIC_ASSERT((std::is_same<copy_refcv_t<int, real>, real>::value));
+TI_STATIC_ASSERT((std::is_same<copy_refcv_t<int &, real>, real &>::value));
+TI_STATIC_ASSERT((copy_refcv<const int &, real>::has_lvalue_ref));
+TI_STATIC_ASSERT(
     (std::is_same<copy_refcv<const int &, real>::G2, const real>::value));
-TC_STATIC_ASSERT(
+TI_STATIC_ASSERT(
     (std::is_same<copy_refcv_t<const int &, real>, const real &>::value));
-TC_STATIC_ASSERT((std::is_same<copy_refcv_t<const volatile int &, real>,
+TI_STATIC_ASSERT((std::is_same<copy_refcv_t<const volatile int &, real>,
                                const volatile real &>::value));
 
 // clang-format off
-#define TC_REPEAT27(F) \
+#define TI_REPEAT27(F) \
   F(0);                \
   F(1);                \
   F(2);                \
@@ -191,7 +191,7 @@ TC_STATIC_ASSERT((std::is_same<copy_refcv_t<const volatile int &, real>,
   F(25);               \
   F(26);
 
-#define TC_LIST27(F)   \
+#define TI_LIST27(F)   \
   F(0),             \
   F(1),             \
   F(2),             \
diff --git a/taichi/common/serialization.h b/taichi/common/serialization.h
index b121a64d4b08d..c4522b3d945d7 100644
--- a/taichi/common/serialization.h
+++ b/taichi/common/serialization.h
@@ -16,19 +16,19 @@
 #include <iostream>
 #include <type_traits>
 
-#ifdef TC_INCLUDED
-TC_NAMESPACE_BEGIN
+#ifdef TI_INCLUDED
+TI_NAMESPACE_BEGIN
 #else
-#define TC_NAMESPACE_BEGIN
-#define TC_NAMESPACE_END
-#define TC_EXPORT
-#define TC_TRACE
-#define TC_CRITICAL
-#define TC_ASSERT assert
+#define TI_NAMESPACE_BEGIN
+#define TI_NAMESPACE_END
+#define TI_EXPORT
+#define TI_TRACE
+#define TI_CRITICAL
+#define TI_ASSERT assert
 #endif
 
 template <typename T>
-TC_EXPORT std::unique_ptr<T> create_instance_unique(const std::string &alias);
+TI_EXPORT std::unique_ptr<T> create_instance_unique(const std::string &alias);
 
 ////////////////////////////////////////////////////////////////////////////////
 //                   A Minimalist Serializer for Taichi                       //
@@ -53,7 +53,7 @@ template <typename T>
 using is_unit_t = typename is_unit<T>::type;
 }  // namespace type
 
-#define TC_IO_DECL_INST                               \
+#define TI_IO_DECL_INST                               \
   void binary_io(BinaryOutputSerializer &ser) const { \
     ser(*this);                                       \
   }                                                   \
@@ -61,7 +61,7 @@ using is_unit_t = typename is_unit<T>::type;
     ser(*this);                                       \
   }
 
-#define TC_IO_DECL_INST_VIRT                                  \
+#define TI_IO_DECL_INST_VIRT                                  \
   virtual void binary_io(BinaryOutputSerializer &ser) const { \
     ser(*this);                                               \
   }                                                           \
@@ -69,7 +69,7 @@ using is_unit_t = typename is_unit<T>::type;
     ser(*this);                                               \
   }
 
-#define TC_IO_DECL_INST_VIRT_OVERRIDE                                  \
+#define TI_IO_DECL_INST_VIRT_OVERRIDE                                  \
   virtual void binary_io(BinaryOutputSerializer &ser) const override { \
     ser(*this);                                                        \
   }                                                                    \
@@ -77,47 +77,47 @@ using is_unit_t = typename is_unit<T>::type;
     ser(*this);                                                        \
   }
 
-#define TC_IO_DECL      \
-  TC_IO_DECL_INST       \
+#define TI_IO_DECL      \
+  TI_IO_DECL_INST       \
   template <typename S> \
   void io(S &serializer) const
 
-#define TC_IO_DECL_VIRT \
-  TC_IO_DECL_INST_VIRT  \
+#define TI_IO_DECL_VIRT \
+  TI_IO_DECL_INST_VIRT  \
   template <typename S> \
   void io(S &serializer) const
 
-#define TC_IO_DECL_VIRT_OVERRIDE \
-  TC_IO_DECL_INST_VIRT_OVERRIDE  \
+#define TI_IO_DECL_VIRT_OVERRIDE \
+  TI_IO_DECL_INST_VIRT_OVERRIDE  \
   template <typename S>          \
   void io(S &serializer) const
 
-#define TC_IO_DEF(...)           \
-  TC_IO_DECL_INST                \
+#define TI_IO_DEF(...)           \
+  TI_IO_DECL_INST                \
   template <typename S>          \
   void io(S &serializer) const { \
-    TC_IO(__VA_ARGS__)           \
+    TI_IO(__VA_ARGS__)           \
   }
 
-#define TC_IO_DEF_VIRT(...)      \
-  TC_IO_DECL_INST_VIRT           \
+#define TI_IO_DEF_VIRT(...)      \
+  TI_IO_DECL_INST_VIRT           \
   template <typename S>          \
   void io(S &serializer) const { \
-    TC_IO(__VA_ARGS__)           \
+    TI_IO(__VA_ARGS__)           \
   }
 
-#define TC_IO_DEF_WITH_BASE(...) \
-  TC_IO_DECL_INST_VIRT_OVERRIDE  \
+#define TI_IO_DEF_WITH_BASE(...) \
+  TI_IO_DECL_INST_VIRT_OVERRIDE  \
   template <typename S>          \
   void io(S &serializer) const { \
     Base::io(serializer);        \
-    TC_IO(__VA_ARGS__)           \
+    TI_IO(__VA_ARGS__)           \
   }
 
-#define TC_IO(...) \
+#define TI_IO(...) \
   { serializer(#__VA_ARGS__, __VA_ARGS__); }
 
-#define TC_SERIALIZER_IS(T)                                                 \
+#define TI_SERIALIZER_IS(T)                                                 \
   (std::is_same<typename std::remove_reference<decltype(serializer)>::type, \
                 T>())
 
@@ -180,7 +180,7 @@ inline std::vector<uint8> read_data_from_file(const std::string &fn) {
   std::vector<uint8_t> data;
   std::FILE *f = fopen(fn.c_str(), "rb");
   if (f == nullptr) {
-    TC_ERROR("Cannot open file: {}", fn);
+    TI_ERROR("Cannot open file: {}", fn);
     return std::vector<uint8_t>();
   }
   if (ends_with(fn, ".zip")) {
@@ -212,7 +212,7 @@ inline void write_data_to_file(const std::string &fn,
                                std::size_t size) {
   std::FILE *f = fopen(fn.c_str(), "wb");
   if (f == nullptr) {
-    TC_ERROR("Cannot open file [{}] for writing. (Does the directory exist?)",
+    TI_ERROR("Cannot open file [{}] for writing. (Does the directory exist?)",
              fn);
     assert(f != nullptr);
   }
@@ -223,7 +223,7 @@ inline void write_data_to_file(const std::string &fn,
     fwrite(data, sizeof(uint8_t), size, f);
     std::fclose(f);
   } else {
-    TC_ERROR("File must end with .tcb or .tcb.zip. [Filename = {}]", fn);
+    TI_ERROR("File must end with .tcb or .tcb.zip. [Filename = {}]", fn);
   }
 }
 
@@ -263,7 +263,7 @@ class BinarySerializer : public Serializer {
     std::size_t n = 0;
     head = 0;
     if (preserved_ != 0) {
-      TC_TRACE("perserved = {}", preserved_);
+      TI_TRACE("perserved = {}", preserved_);
       // Preserved mode
       this->preserved = preserved_;
       assert(c_data != nullptr);
@@ -352,7 +352,7 @@ class BinarySerializer : public Serializer {
       std::size_t new_size = head + sizeof(T);
       if (c_data) {
         if (new_size > preserved) {
-          TC_CRITICAL("Preserved Buffer (size {}) Overflow.", preserved);
+          TI_CRITICAL("Preserved Buffer (size {}) Overflow.", preserved);
         }
         //*reinterpret_cast<typename type::remove_cvref_t<T> *>(&c_data[head]) =
         //    val;
@@ -444,7 +444,7 @@ class BinarySerializer : public Serializer {
     if (writing) {
       this->operator()("", ptr_to_int(val));
       if (val != nullptr) {
-        TC_ASSERT_INFO(assets.find(ptr_to_int(val)) != assets.end(),
+        TI_ASSERT_INFO(assets.find(ptr_to_int(val)) != assets.end(),
                        "Cannot find the address with a smart pointer pointing "
                        "to. Make sure the smart pointer is serialized before "
                        "the raw pointer.");
@@ -453,7 +453,7 @@ class BinarySerializer : public Serializer {
       std::size_t val_ptr;
       this->operator()("", val_ptr);
       if (val_ptr != 0) {
-        TC_ASSERT(assets.find(val_ptr) != assets.end());
+        TI_ASSERT(assets.find(val_ptr) != assets.end());
         val = reinterpret_cast<typename std::remove_pointer<T>::type *>(
             assets[val_ptr]);
       }
@@ -759,4 +759,4 @@ static_assert(
         std::vector<std::unique_ptr<int>> &>(),
     "");
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/task.h b/taichi/common/task.h
index 338a04bcc1672..b178ff307af1a 100644
--- a/taichi/common/task.h
+++ b/taichi/common/task.h
@@ -9,7 +9,7 @@
 #include <string>
 #include "interface.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class Task : public Unit {
  public:
@@ -26,7 +26,7 @@ class Task : public Unit {
   }
 };
 
-TC_INTERFACE(Task)
+TI_INTERFACE(Task)
 
 template <typename T>
 inline std::enable_if_t<
@@ -66,12 +66,12 @@ task_invoke(const T &func, const std::vector<std::string> &params) {
   return func();
 }
 
-#define TC_REGISTER_TASK(task)                                             \
+#define TI_REGISTER_TASK(task)                                             \
   class Task_##task : public taichi::Task {                                \
     std::string run(const std::vector<std::string> &parameters) override { \
       return taichi::task_invoke<decltype(task)>(task, parameters);        \
     }                                                                      \
   };                                                                       \
-  TC_IMPLEMENTATION(Task, Task_##task, #task)
+  TI_IMPLEMENTATION(Task, Task_##task, #task)
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/testing.h b/taichi/common/testing.h
index 698dd74ba1a6d..b6c70764bbb81 100644
--- a/taichi/common/testing.h
+++ b/taichi/common/testing.h
@@ -10,9 +10,9 @@
 #include <catch.hpp>
 #undef BENCHMARK
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-#define TC_CHECK_EQUAL(A, B, tolerance)              \
+#define TI_CHECK_EQUAL(A, B, tolerance)              \
   {                                                  \
     if (!taichi::math::equal(A, B, tolerance)) {     \
       std::cout << A << std::endl << B << std::endl; \
@@ -20,17 +20,17 @@ TC_NAMESPACE_BEGIN
     CHECK(taichi::math::equal(A, B, tolerance));     \
   }
 
-#define TC_ASSERT_EQUAL(A, B, tolerance)             \
+#define TI_ASSERT_EQUAL(A, B, tolerance)             \
   {                                                  \
     if (!taichi::math::equal(A, B, tolerance)) {     \
       std::cout << A << std::endl << B << std::endl; \
-      TC_ERROR(#A " != " #B);                        \
+      TI_ERROR(#A " != " #B);                        \
     }                                                \
   }
 
-#define TC_TEST(x) TEST_CASE(x, ("[" x "]"))
-#define TC_CHECK(x) CHECK(x)
+#define TI_TEST(x) TEST_CASE(x, ("[" x "]"))
+#define TI_CHECK(x) CHECK(x)
 
 int run_tests(std::vector<std::string> argv);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/common/util.h b/taichi/common/util.h
index 34cf359233e61..e1bdfeb4f7570 100644
--- a/taichi/common/util.h
+++ b/taichi/common/util.h
@@ -34,7 +34,7 @@
 
 // Windows
 #if defined(_WIN64)
-#define TC_PLATFORM_WINDOWS
+#define TI_PLATFORM_WINDOWS
 #endif
 
 #if defined(_WIN32) && !defined(_WIN64)
@@ -43,20 +43,20 @@ static_assert(false, "32-bit Windows systems are not supported")
 
 // Linux
 #if defined(__linux__)
-#define TC_PLATFORM_LINUX
+#define TI_PLATFORM_LINUX
 #endif
 
 // OSX
 #if defined(__APPLE__)
-#define TC_PLATFORM_OSX
+#define TI_PLATFORM_OSX
 #endif
 
-#if (defined(TC_PLATFORM_LINUX) || defined(TC_PLATFORM_OSX))
-#define TC_PLATFORM_UNIX
+#if (defined(TI_PLATFORM_LINUX) || defined(TI_PLATFORM_OSX))
+#define TI_PLATFORM_UNIX
 #endif
 
 // Avoid dependency on glibc 2.27
-#if defined(TC_PLATFORM_LINUX) && defined(TI_ARCH_x86_64)
+#if defined(TI_PLATFORM_LINUX) && defined(TI_ARCH_x86_64)
 // objdump -T libtaichi_core.so| grep  GLIBC_2.27
 __asm__(".symver logf,logf@GLIBC_2.2.5");
 __asm__(".symver powf,powf@GLIBC_2.2.5");
@@ -67,38 +67,38 @@ __asm__(".symver expf,expf@GLIBC_2.2.5");
 
 // MSVC
 #if defined(_MSC_VER)
-#define TC_COMPILER_MSVC
+#define TI_COMPILER_MSVC
 #endif
 
 // MINGW
 #if defined(__MINGW64__)
-#define TC_COMPILER_MINGW
+#define TI_COMPILER_MINGW
 #endif
 
 // gcc
 #if defined(__GNUC__)
-#define TC_COMPILER__GCC
+#define TI_COMPILER__GCC
 #endif
 
 // clang
 #if defined(__clang__)
-#define TC_COMPILER_CLANG
+#define TI_COMPILER_CLANG
 #endif
 
-#if defined(TC_COMPILER_MSVC)
-#define TC_ALIGNED(x) __declspec(align(x))
+#if defined(TI_COMPILER_MSVC)
+#define TI_ALIGNED(x) __declspec(align(x))
 #else
-#define TC_ALIGNED(x) __attribute__((aligned(x)))
+#define TI_ALIGNED(x) __attribute__((aligned(x)))
 #endif
 
 #if __cplusplus >= 201703L
-#define TC_CPP17
+#define TI_CPP17
 #else
-#if defined(TC_COMPILER_CLANG)
+#if defined(TI_COMPILER_CLANG)
 static_assert(false, "For clang compilers, use -std=c++17");
 #endif
 static_assert(__cplusplus >= 201402L, "C++14 required.");
-#define TC_CPP14
+#define TI_CPP14
 #endif
 
 // Do not disable assert...
@@ -114,14 +114,14 @@ static_assert(__cplusplus >= 201402L, "C++14 required.");
 #include <windows.h>
 #pragma warning(pop)
 #include <intrin.h>
-#define TC_EXPORT __declspec(dllexport)
+#define TI_EXPORT __declspec(dllexport)
 #else
 #define __FILENAME__ \
   (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
-#define TC_EXPORT
+#define TI_EXPORT
 #endif
-#define TC_P(x) \
-  { TC_DEBUG("{}", taichi::TextSerializer::serialize(#x, (x))); }
+#define TI_P(x) \
+  { TI_DEBUG("{}", taichi::TextSerializer::serialize(#x, (x))); }
 
 #ifndef _WIN64
 #define sscanf_s sscanf
@@ -130,7 +130,7 @@ static_assert(__cplusplus >= 201402L, "C++14 required.");
 
 #undef assert
 #ifdef _WIN64
-#ifndef TC_PASS_EXCEPTION_TO_PYTHON
+#ifndef TI_PASS_EXCEPTION_TO_PYTHON
 // For Visual Studio debugging...
 #define DEBUG_TRIGGER __debugbreak()
 #else
@@ -144,21 +144,21 @@ static_assert(__cplusplus >= 201402L, "C++14 required.");
   {                                        \
     bool ___ret___ = static_cast<bool>(x); \
     if (!___ret___) {                      \
-      TC_ERROR(info);                      \
+      TI_ERROR(info);                      \
     }                                      \
   }
 
-#define TC_STATIC_ASSERT(x) static_assert((x), #x);
-#define TC_ASSERT(x) TC_ASSERT_INFO((x), #x)
-#define TC_ASSERT_INFO assert_info
-#define TC_NOT_IMPLEMENTED TC_ERROR("Not supported.");
+#define TI_STATIC_ASSERT(x) static_assert((x), #x);
+#define TI_ASSERT(x) TI_ASSERT_INFO((x), #x)
+#define TI_ASSERT_INFO assert_info
+#define TI_NOT_IMPLEMENTED TI_ERROR("Not supported.");
 
-#define TC_NAMESPACE_BEGIN namespace taichi {
-#define TC_NAMESPACE_END }
+#define TI_NAMESPACE_BEGIN namespace taichi {
+#define TI_NAMESPACE_END }
 
-    TC_EXPORT void taichi_raise_assertion_failure_in_python(const char *msg);
+    TI_EXPORT void taichi_raise_assertion_failure_in_python(const char *msg);
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 //******************************************************************************
 //                                 System State
@@ -200,15 +200,15 @@ using int64 = int64_t;
 using uint64 = uint64_t;
 
 #ifdef _WIN64
-#define TC_FORCE_INLINE __forceinline
+#define TI_FORCE_INLINE __forceinline
 #else
-#define TC_FORCE_INLINE inline __attribute__((always_inline))
+#define TI_FORCE_INLINE inline __attribute__((always_inline))
 #endif
 
 using float32 = float;
 using float64 = double;
 
-#ifdef TC_USE_DOUBLE
+#ifdef TI_USE_DOUBLE
 using real = float64;
 #else
 using real = float32;
@@ -251,9 +251,9 @@ float64 constexpr operator"" _fd(unsigned long long v) {
   return float64(v);
 }
 
-TC_EXPORT void print_traceback();
+TI_EXPORT void print_traceback();
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 //******************************************************************************
 //                           Meta-programming
 //******************************************************************************
@@ -268,89 +268,89 @@ namespace spdlog {
 class logger;
 }
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 #define SPD_AUGMENTED_LOG(X, ...)                                        \
   taichi::logger.X(                                                      \
       fmt::format("[{}:{}@{}] ", __FILENAME__, __FUNCTION__, __LINE__) + \
       fmt::format(__VA_ARGS__))
 
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
 #define TI_UNREACHABLE __assume(0);
 #else
 #define TI_UNREACHABLE __builtin_unreachable();
 #endif
 
-#define TC_TRACE(...) SPD_AUGMENTED_LOG(trace, __VA_ARGS__)
-#define TC_DEBUG(...) SPD_AUGMENTED_LOG(debug, __VA_ARGS__)
-#define TC_INFO(...) SPD_AUGMENTED_LOG(info, __VA_ARGS__)
-#define TC_WARN(...) SPD_AUGMENTED_LOG(warn, __VA_ARGS__)
-#define TC_ERROR(...)                      \
+#define TI_TRACE(...) SPD_AUGMENTED_LOG(trace, __VA_ARGS__)
+#define TI_DEBUG(...) SPD_AUGMENTED_LOG(debug, __VA_ARGS__)
+#define TI_INFO(...) SPD_AUGMENTED_LOG(info, __VA_ARGS__)
+#define TI_WARN(...) SPD_AUGMENTED_LOG(warn, __VA_ARGS__)
+#define TI_ERROR(...)                      \
   {                                        \
     SPD_AUGMENTED_LOG(error, __VA_ARGS__); \
     TI_UNREACHABLE;                        \
   }
-#define TC_CRITICAL(...)                      \
+#define TI_CRITICAL(...)                      \
   {                                           \
     SPD_AUGMENTED_LOG(critical, __VA_ARGS__); \
     TI_UNREACHABLE;                           \
   }
 
-#define TC_TRACE_IF(condition, ...) \
+#define TI_TRACE_IF(condition, ...) \
   if (condition) {                  \
-    TC_TRACE(__VA_ARGS__);          \
+    TI_TRACE(__VA_ARGS__);          \
   }
-#define TC_TRACE_UNLESS(condition, ...) \
+#define TI_TRACE_UNLESS(condition, ...) \
   if (!(condition)) {                   \
-    TC_TRACE(__VA_ARGS__);              \
+    TI_TRACE(__VA_ARGS__);              \
   }
-#define TC_DEBUG_IF(condition, ...) \
+#define TI_DEBUG_IF(condition, ...) \
   if (condition) {                  \
-    TC_DEBUG(__VA_ARGS__);          \
+    TI_DEBUG(__VA_ARGS__);          \
   }
-#define TC_DEBUG_UNLESS(condition, ...) \
+#define TI_DEBUG_UNLESS(condition, ...) \
   if (!(condition)) {                   \
-    TC_DEBUG(__VA_ARGS__);              \
+    TI_DEBUG(__VA_ARGS__);              \
   }
-#define TC_INFO_IF(condition, ...) \
+#define TI_INFO_IF(condition, ...) \
   if (condition) {                 \
-    TC_INFO(__VA_ARGS__);          \
+    TI_INFO(__VA_ARGS__);          \
   }
-#define TC_INFO_UNLESS(condition, ...) \
+#define TI_INFO_UNLESS(condition, ...) \
   if (!(condition)) {                  \
-    TC_INFO(__VA_ARGS__);              \
+    TI_INFO(__VA_ARGS__);              \
   }
-#define TC_WARN_IF(condition, ...) \
+#define TI_WARN_IF(condition, ...) \
   if (condition) {                 \
-    TC_WARN(__VA_ARGS__);          \
+    TI_WARN(__VA_ARGS__);          \
   }
-#define TC_WARN_UNLESS(condition, ...) \
+#define TI_WARN_UNLESS(condition, ...) \
   if (!(condition)) {                  \
-    TC_WARN(__VA_ARGS__);              \
+    TI_WARN(__VA_ARGS__);              \
   }
-#define TC_ERROR_IF(condition, ...) \
+#define TI_ERROR_IF(condition, ...) \
   if (condition) {                  \
-    TC_ERROR(__VA_ARGS__);          \
+    TI_ERROR(__VA_ARGS__);          \
   }
-#define TC_ERROR_UNLESS(condition, ...) \
+#define TI_ERROR_UNLESS(condition, ...) \
   if (!(condition)) {                   \
-    TC_ERROR(__VA_ARGS__);              \
+    TI_ERROR(__VA_ARGS__);              \
   }
-#define TC_CRITICAL_IF(condition, ...) \
+#define TI_CRITICAL_IF(condition, ...) \
   if (condition) {                     \
-    TC_CRITICAL(__VA_ARGS__);          \
+    TI_CRITICAL(__VA_ARGS__);          \
   }
-#define TC_CRITICAL_UNLESS(condition, ...) \
+#define TI_CRITICAL_UNLESS(condition, ...) \
   if (!(condition)) {                      \
-    TC_CRITICAL(__VA_ARGS__);              \
+    TI_CRITICAL(__VA_ARGS__);              \
   }
 
-#define TC_STOP TC_ERROR("Stopping here")
-#define TC_TAG TC_TRACE("Tagging here")
+#define TI_STOP TI_ERROR("Stopping here")
+#define TI_TAG TI_TRACE("Tagging here")
 
-#define TC_LOG_SET_PATTERN(x) spdlog::set_pattern(x);
+#define TI_LOG_SET_PATTERN(x) spdlog::set_pattern(x);
 
-#define TC_FLUSH_LOGGER \
+#define TI_FLUSH_LOGGER \
   { taichi::logger.flush(); };
 
 
@@ -431,7 +431,7 @@ inline bool starts_with(std::string const &str, std::string const &ending) {
     return std::equal(ending.begin(), ending.end(), str.begin());
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 //******************************************************************************
 //                               Serialization
@@ -443,7 +443,7 @@ TC_NAMESPACE_END
 //                                   Misc.
 //******************************************************************************
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 extern int __trash__;
 template <typename T>
@@ -465,10 +465,10 @@ class DeferedExecution {
   }
 };
 
-#define TC_DEFER(x) taichi::DeferedExecution _defered([&]() { x; });
+#define TI_DEFER(x) taichi::DeferedExecution _defered([&]() { x; });
 
 inline bool running_on_windows() {
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
   return true;
 #else
   return false;
@@ -495,7 +495,7 @@ inline std::string absolute_path(std::string path) {
   //    C. Those who start with "$" are relative to assets_dir()
   //    D. Others are relative to $ENV{TAICHI_REPO_DIR}
 
-  TC_ASSERT(!path.empty());
+  TI_ASSERT(!path.empty());
   if (path[0] == '$') {
     path = assets_dir() + path.substr(1, (int)path.size() - 1);
   } else if (path[0] != '.' && path[0] != '/' &&
@@ -521,6 +521,6 @@ std::string get_cuda_version_string();
 
 std::string get_cuda_root_dir();
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 #include "asset_manager.h"
diff --git a/taichi/core/bit.cpp b/taichi/core/bit.cpp
index e8842f06a7925..ed975c2a01ee3 100644
--- a/taichi/core/bit.cpp
+++ b/taichi/core/bit.cpp
@@ -6,18 +6,18 @@
 #include <taichi/testing.h>
 #include <taichi/common/bit.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using namespace bit;
 
 struct Flags : public Bits<32> {
   using Base = Bits<32>;
-  TC_BIT_FIELD(bool, apple, 0);
-  TC_BIT_FIELD(bool, banana, 1);
-  TC_BIT_FIELD(uint8, cherry, 2);
+  TI_BIT_FIELD(bool, apple, 0);
+  TI_BIT_FIELD(bool, banana, 1);
+  TI_BIT_FIELD(uint8, cherry, 2);
 };
 
-TC_TEST("bit") {
+TI_TEST("bit") {
   Bits<32> b;
   b.set<5>(1);
   CHECK(b.get() == 32);
@@ -66,8 +66,8 @@ TC_TEST("bit") {
 
   // float64 t = 123.456789;
   // auto e = extract(t);
-  // TC_P(std::get<0>(e));
-  // TC_P(std::get<1>(e));
+  // TI_P(std::get<0>(e));
+  // TI_P(std::get<1>(e));
   // CHECK(t == compress(std::get<0>(e), std::get<1>(e)));
 }
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/core/core.cpp b/taichi/core/core.cpp
index f130522a32592..bc89b1fa873d0 100644
--- a/taichi/core/core.cpp
+++ b/taichi/core/core.cpp
@@ -6,7 +6,7 @@
 #include <taichi/common/util.h>
 #include <taichi/common/version.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 bool is_release() {
   auto dir = std::getenv("TAICHI_REPO_DIR");
@@ -31,11 +31,11 @@ std::string get_repo_dir() {
   auto dir = std::getenv("TAICHI_REPO_DIR");
   if (is_release()) {
     // release mode. Use ~/.taichi as root
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
     return "C:/taichi_cache/";
 #else
     auto home = std::getenv("HOME");
-    TC_ASSERT(home != nullptr);
+    TI_ASSERT(home != nullptr);
     return std::string(home) + "/.taichi/";
 #endif
   } else {
@@ -79,4 +79,4 @@ std::string get_cuda_root_dir() {
   return TI_CUDAROOT_DIR;
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/core/interfaces.cpp b/taichi/core/interfaces.cpp
index 527d7d6ac30d9..5999f270ae3b5 100644
--- a/taichi/core/interfaces.cpp
+++ b/taichi/core/interfaces.cpp
@@ -8,9 +8,9 @@
 #include <taichi/common/task.h>
 #include <taichi/system/benchmark.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-TC_INTERFACE_DEF(Benchmark, "benchmark")
-TC_INTERFACE_DEF(Task, "task")
+TI_INTERFACE_DEF(Benchmark, "benchmark")
+TI_INTERFACE_DEF(Task, "task")
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/core/logging.cpp b/taichi/core/logging.cpp
index 798fd2ac2e2d9..59738460d7ebb 100644
--- a/taichi/core/logging.cpp
+++ b/taichi/core/logging.cpp
@@ -9,7 +9,7 @@
 #include <spdlog/spdlog.h>
 #include <taichi/geometry/factory.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 Function11 python_at_exit;
 
@@ -17,7 +17,7 @@ const auto default_logging_level = "info";
 
 void signal_handler(int signo);
 
-#define TC_REGISTER_SIGNAL_HANDLER(name, handler)                   \
+#define TI_REGISTER_SIGNAL_HANDLER(name, handler)                   \
   {                                                                 \
     if (std::signal(name, handler) == SIG_ERR)                      \
       std::printf("Cannot register signal handler for" #name "\n"); \
@@ -53,7 +53,7 @@ int Logger::level_enum_from_string(const std::string &level_name) {
   } else if (level_name == "off") {
     return spdlog::level::off;
   } else {
-    TC_ERROR(
+    TI_ERROR(
         "Unknown logging level [{}]. Levels = trace, debug, info, warn, error, "
         "critical, off",
         level_name);
@@ -63,16 +63,16 @@ int Logger::level_enum_from_string(const std::string &level_name) {
 Logger::Logger() {
   console = spdlog::stdout_color_mt("console");
   console->flush_on(spdlog::level::trace);
-  TC_LOG_SET_PATTERN("[%L %D %X.%e] %v")
+  TI_LOG_SET_PATTERN("[%L %D %X.%e] %v")
 
-  TC_REGISTER_SIGNAL_HANDLER(SIGSEGV, signal_handler);
-  TC_REGISTER_SIGNAL_HANDLER(SIGABRT, signal_handler);
+  TI_REGISTER_SIGNAL_HANDLER(SIGSEGV, signal_handler);
+  TI_REGISTER_SIGNAL_HANDLER(SIGABRT, signal_handler);
 #if !defined(_WIN64)
-  TC_REGISTER_SIGNAL_HANDLER(SIGBUS, signal_handler);
+  TI_REGISTER_SIGNAL_HANDLER(SIGBUS, signal_handler);
 #endif
-  TC_REGISTER_SIGNAL_HANDLER(SIGFPE, signal_handler);
+  TI_REGISTER_SIGNAL_HANDLER(SIGFPE, signal_handler);
   set_level_default();
-  TC_TRACE("Taichi core started. Thread ID = {}", PID::get_pid());
+  TI_TRACE("Taichi core started. Thread ID = {}", PID::get_pid());
 }
 
 void Logger::set_level_default() {
@@ -137,28 +137,28 @@ bool python_at_exit_called = false;
 void signal_handler(int signo) {
   logger.error(
       fmt::format("Received signal {} ({})", signo, signal_name(signo)), false);
-  TC_FLUSH_LOGGER;
+  TI_FLUSH_LOGGER;
   taichi::print_traceback();
   fmt::print("\n\n\n");
   if (taichi::CoreState::get_instance().trigger_gdb_when_crash) {
-#if defined(TC_PLATFORM_LINUX)
+#if defined(TI_PLATFORM_LINUX)
     trash(system(fmt::format("sudo gdb -p {}", PID::get_pid()).c_str()));
 #endif
   }
   if (python_at_exit && !python_at_exit_called) {
     python_at_exit_called = true;
-    TC_INFO("Invoking registered Python at_exit...");
+    TI_INFO("Invoking registered Python at_exit...");
     python_at_exit(0);
-    TC_INFO("Python-side at_exit returned.");
+    TI_INFO("Python-side at_exit returned.");
   }
   if (taichi::CoreState::get_instance().python_imported) {
     std::string msg = fmt::format("Taichi Core Exception: {} ({})", signo,
                                   signal_name(signo));
-#if !defined(TC_AMALGAMATED)
+#if !defined(TI_AMALGAMATED)
     taichi_raise_assertion_failure_in_python(msg.c_str());
 #endif
   }
   std::exit(-1);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/core/testing.cpp b/taichi/core/testing.cpp
index d832278aa797e..9c3da5276eb34 100644
--- a/taichi/core/testing.cpp
+++ b/taichi/core/testing.cpp
@@ -6,7 +6,7 @@
 #define CATCH_CONFIG_RUNNER
 #include <taichi/common/testing.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 int run_tests(std::vector<std::string> argv) {
   char arg[] = "test";
@@ -22,4 +22,4 @@ int run_tests(std::vector<std::string> argv) {
   return session.run();
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/cuda_utils.h b/taichi/cuda_utils.h
index 9c9166047bd96..e0faa98cabd35 100644
--- a/taichi/cuda_utils.h
+++ b/taichi/cuda_utils.h
@@ -6,21 +6,21 @@
 #include <cuda_runtime_api.h>
 #include <driver_types.h>
 
-#define check_cuda_error(err)                                  \
-  {                                                             \
+#define check_cuda_error(err)                                   \
+  do {                                                          \
     auto __err = (err);                                         \
     if (int(__err))                                             \
-      TC_ERROR("Cuda Error {}: {}", get_cuda_error_name(__err), \
+      TI_ERROR("Cuda Error {}: {}", get_cuda_error_name(__err), \
                get_cuda_error_string(__err));                   \
-  }
+  } while (0)
 
 #define check_cuda_error_as_warning(err)                                 \
-  {                                                                      \
+  do {                                                                   \
     auto __err = (err);                                                  \
     if (int(__err))                                                      \
-      TC_WARN("Cuda Error {} (treated as warning): {}",                  \
+      TI_WARN("Cuda Error {} (treated as warning): {}",                  \
               get_cuda_error_name(__err), get_cuda_error_string(__err)); \
-  }
+  } while (0)
 
 TLANG_NAMESPACE_BEGIN
 
diff --git a/taichi/exception_handling_tests.cpp b/taichi/exception_handling_tests.cpp
index f5d79e07f8ec7..43e6a837e985b 100644
--- a/taichi/exception_handling_tests.cpp
+++ b/taichi/exception_handling_tests.cpp
@@ -22,7 +22,7 @@ class NodeA : public NodeBase {
   }
 
   void visit() override {
-    TC_INFO("Visiting node A");
+    TI_INFO("Visiting node A");
     if (ch)
       ch->visit();
   }
@@ -34,7 +34,7 @@ class NodeB : public NodeBase {
   }
 
   void visit() override {
-    TC_INFO("Visiting node B, throwing std::exception");
+    TI_INFO("Visiting node B, throwing std::exception");
     throw std::exception();
   }
 };
@@ -45,15 +45,15 @@ class NodeC : public NodeBase {
   }
 
   void visit() override {
-    TC_INFO("Visiting node C, throwing IRModified");
+    TI_INFO("Visiting node C, throwing IRModified");
     throw IRModified();
   }
 };
 
 int test_throw(const std::string &seq) {
-  TC_ASSERT(seq.size() >= 0);
+  TI_ASSERT(seq.size() >= 0);
   std::unique_ptr<NodeBase> root;
-  TC_P(seq);
+  TI_P(seq);
   for (int i = (int)seq.size() - 1; i >= 0; i--) {
     auto ch = seq[i];
     if (ch == 'A') {
@@ -63,16 +63,16 @@ int test_throw(const std::string &seq) {
     } else if (ch == 'C') {
       root = std::make_unique<NodeC>(std::move(root));
     } else {
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
   }
   try {
     root->visit();
   } catch (const IRModified &) {
-    TC_INFO("Caught IRModified (Node C)");
+    TI_INFO("Caught IRModified (Node C)");
     return 2;
   } catch (const std::exception &) {
-    TC_INFO("Caught std::exception (Node B)");
+    TI_INFO("Caught std::exception (Node B)");
     return 1;
   }
   return 0;
@@ -83,17 +83,17 @@ auto test_exception_handling = [](const std::vector<std::string> &params) {
 };
 
 auto test_exception_handling_auto = []() {
-  TC_ASSERT(test_throw("A") == 0);
-  TC_ASSERT(test_throw("AAA") == 0);
-  TC_ASSERT(test_throw("AAB") == 1);
-  TC_ASSERT(test_throw("AAC") == 2);
-  TC_ASSERT(test_throw("AACB") == 2);
-  TC_ASSERT(test_throw("AABC") == 1);
-
-  TC_INFO("Test was successful");
+  TI_ASSERT(test_throw("A") == 0);
+  TI_ASSERT(test_throw("AAA") == 0);
+  TI_ASSERT(test_throw("AAB") == 1);
+  TI_ASSERT(test_throw("AAC") == 2);
+  TI_ASSERT(test_throw("AACB") == 2);
+  TI_ASSERT(test_throw("AABC") == 1);
+
+  TI_INFO("Test was successful");
 };
 
-TC_REGISTER_TASK(test_exception_handling);
-TC_REGISTER_TASK(test_exception_handling_auto);
+TI_REGISTER_TASK(test_exception_handling);
+TI_REGISTER_TASK(test_exception_handling_auto);
 
 TLANG_NAMESPACE_END
diff --git a/taichi/expr.cpp b/taichi/expr.cpp
index 0d77360e1d1f1..cbc39ce6b4b5d 100644
--- a/taichi/expr.cpp
+++ b/taichi/expr.cpp
@@ -4,7 +4,7 @@
 TLANG_NAMESPACE_BEGIN
 
 std::string Expr::serialize() const {
-  TC_ASSERT(expr);
+  TI_ASSERT(expr);
   return expr->serialize();
 }
 
diff --git a/taichi/expr.h b/taichi/expr.h
index 1b93b5070d44c..c9b2d36abe277 100644
--- a/taichi/expr.h
+++ b/taichi/expr.h
@@ -58,7 +58,7 @@ class Expr {
 
   template <typename T>
   Handle<T> cast() const {
-    TC_ASSERT(expr != nullptr);
+    TI_ASSERT(expr != nullptr);
     return std::dynamic_pointer_cast<T>(expr);
   }
 
diff --git a/taichi/expression.h b/taichi/expression.h
index 002f9b0be3e84..1171491e1f847 100644
--- a/taichi/expression.h
+++ b/taichi/expression.h
@@ -1,6 +1,6 @@
 // Arithmatic operations
 
-#if defined(TC_EXPRESSION_IMPLEMENTATION)
+#if defined(TI_EXPRESSION_IMPLEMENTATION)
 
 #undef DEFINE_EXPRESSION_OP_BINARY
 #undef DEFINE_EXPRESSION_OP_UNARY
diff --git a/taichi/geometry/factory.cpp b/taichi/geometry/factory.cpp
index ae8a84ffdedcd..8b5b370b1b893 100644
--- a/taichi/geometry/factory.cpp
+++ b/taichi/geometry/factory.cpp
@@ -5,7 +5,7 @@
 
 #include <taichi/geometry/factory.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 std::vector<Triangle> Mesh3D::generate(const Vector2i res,
                                        const Function23 *surf,
@@ -85,4 +85,4 @@ std::vector<Triangle> Mesh3D::generate(const Vector2i res,
   return triangles;
 }
 
-TC_NAMESPACE_END
\ No newline at end of file
+TI_NAMESPACE_END
\ No newline at end of file
diff --git a/taichi/geometry/factory.h b/taichi/geometry/factory.h
index dfc3d4bdae7e3..5cd0a82a2fed6 100644
--- a/taichi/geometry/factory.h
+++ b/taichi/geometry/factory.h
@@ -12,7 +12,7 @@
 
 #include <taichi/geometry/primitives.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <int n, typename T>
 using VectorLengthed = std::conditional_t<n != 1, VectorND<n, T>, T>;
@@ -43,4 +43,4 @@ class Mesh3D {
                                         bool smooth_normal);
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/geometry/mesh.h b/taichi/geometry/mesh.h
index dd47451fd22f0..808d7c8ea320a 100644
--- a/taichi/geometry/mesh.h
+++ b/taichi/geometry/mesh.h
@@ -11,7 +11,7 @@
 #include <taichi/math.h>
 #include <taichi/visual/scene.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <int dim>
 struct Element {
@@ -23,7 +23,7 @@ struct Element {
   Vector v[dim];
   bool open_end[dim];
 
-  TC_IO_DEF(v, open_end);
+  TI_IO_DEF(v, open_end);
 
   Element() {
     for (int i = 0; i < dim; i++) {
@@ -51,15 +51,15 @@ struct Element {
 
   Vector get_normal() const {
     Vector ret;
-    TC_STATIC_IF(dim == 2) {
+    TI_STATIC_IF(dim == 2) {
       Vector d = v[1] - v[0];
       ret = normalized(Vector(d[1], -d[0]));
     }
-    TC_STATIC_ELSE {
+    TI_STATIC_ELSE {
       Vector n = cross(v[1] - v[0], v[2] - v[1]);
       ret = normalized(n);
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
     return ret;
   }
 };
@@ -73,10 +73,10 @@ struct ElementMesh {
 
   std::vector<Elem> elements;
 
-  TC_IO_DEF(elements);
+  TI_IO_DEF(elements);
 
   void initialize(const Config &config) {
-    TC_STATIC_IF(dim == 2) {
+    TI_STATIC_IF(dim == 2) {
       std::string s = config.get<std::string>("segment_mesh");
       std::stringstream ss(s);
       int n;
@@ -90,8 +90,8 @@ struct ElementMesh {
         elements.push_back(elem);
       }
     }
-    TC_STATIC_ELSE {
-      TC_INFO("Adding mesh, fn={}", config.get<std::string>("mesh_fn"));
+    TI_STATIC_ELSE {
+      TI_INFO("Adding mesh, fn={}", config.get<std::string>("mesh_fn"));
       std::string mesh_fn = config.get<std::string>("mesh_fn");
       auto mesh = std::make_shared<Mesh>();
       Config mesh_config;
@@ -107,11 +107,11 @@ struct ElementMesh {
         elements.push_back(elem);
       }
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
   }
 };
 
-TC_FORCE_INLINE real distance_to_segment(const Vector2 &pos,
+TI_FORCE_INLINE real distance_to_segment(const Vector2 &pos,
                                          const Vector2 &a,
                                          const Vector2 &b,
                                          bool clamp_to_ends = false,
@@ -134,7 +134,7 @@ TC_FORCE_INLINE real distance_to_segment(const Vector2 &pos,
   }
 }
 
-TC_FORCE_INLINE real distance_to_triangle(const Vector3 &pos,
+TI_FORCE_INLINE real distance_to_triangle(const Vector3 &pos,
                                           const Element<3> &tri) {
   Vector3 normal = tri.get_normal();
   real height = dot(normal, (pos - tri.v[0]));
@@ -153,18 +153,18 @@ TC_FORCE_INLINE real distance_to_triangle(const Vector3 &pos,
 }
 
 // Note: assuming world origin aligns with elem.v[0]
-TC_FORCE_INLINE Matrix2 world_to_element(const Element<2> &elem) {
+TI_FORCE_INLINE Matrix2 world_to_element(const Element<2> &elem) {
   Vector2 v = elem.v[1] - elem.v[0];
   Vector2 n = normalized(Vector2(v.y, -v.x));
   return inversed(Matrix2(v, n));
 }
 
 // Note: assuming world origin aligns with elem.v[0]
-TC_FORCE_INLINE Matrix3 world_to_element(const Element<3> &elem) {
+TI_FORCE_INLINE Matrix3 world_to_element(const Element<3> &elem) {
   Vector3 u = elem.v[1] - elem.v[0];
   Vector3 v = elem.v[2] - elem.v[0];
   Vector3 n = normalized(cross(u, v));
   return inversed(Matrix3(u, v, n));
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/geometry/primitives.h b/taichi/geometry/primitives.h
index c5c5b30e2019e..cb5c2e23ca73d 100644
--- a/taichi/geometry/primitives.h
+++ b/taichi/geometry/primitives.h
@@ -9,7 +9,7 @@
 #include <taichi/math/math.h>
 #include <vector>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class Ray {
  public:
@@ -196,4 +196,4 @@ class Instance {
   Matrix4 transform;
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/gui/cocoa.cpp b/taichi/gui/cocoa.cpp
index fc8975b03379b..64c8f2b6944de 100644
--- a/taichi/gui/cocoa.cpp
+++ b/taichi/gui/cocoa.cpp
@@ -2,10 +2,15 @@
 #include <taichi/common/task.h>
 #include <taichi/visual/gui.h>
 
-#if defined(TC_GUI_COCOA)
+#if defined(TI_GUI_COCOA)
 
 #include <taichi/platform/mac/objc_api.h>
 
+#include <algorithm>
+#include <optional>
+#include <string>
+#include <unordered_map>
+
 // https://stackoverflow.com/questions/4356441/mac-os-cocoa-draw-a-simple-pixel-on-a-canvas
 // http://cocoadevcentral.com/d/intro_to_quartz/
 // Modified based on
@@ -15,6 +20,7 @@
 // https://developer.apple.com/documentation/objectivec/objective-c_runtime?language=objc
 
 #include <ApplicationServices/ApplicationServices.h>
+#include <Carbon/Carbon.h>
 #include <CoreGraphics/CGBase.h>
 #include <CoreGraphics/CGGeometry.h>
 #include <objc/NSObjCRuntime.h>
@@ -23,6 +29,109 @@ namespace {
 using taichi::mac::call;
 using taichi::mac::cast_call;
 using taichi::mac::clscall;
+
+std::string str_tolower(std::string s) {
+  // https://en.cppreference.com/w/cpp/string/byte/tolower
+  std::transform(s.begin(), s.end(), s.begin(),
+                 [](unsigned char c) { return std::tolower(c); });
+  return s;
+}
+
+std::optional<std::string> try_get_alnum(ushort keycode) {
+// Can someone tell me the reason why Apple didn't make these consecutive...
+#define CASE(i) \
+  { kVK_ANSI_##i, str_tolower(#i) }
+  static const std::unordered_map<ushort, std::string> key2str = {
+      CASE(0), CASE(1), CASE(2), CASE(3), CASE(4), CASE(5), CASE(6), CASE(7),
+      CASE(8), CASE(9), CASE(A), CASE(B), CASE(C), CASE(D), CASE(E), CASE(F),
+      CASE(G), CASE(H), CASE(I), CASE(J), CASE(K), CASE(L), CASE(M), CASE(N),
+      CASE(O), CASE(P), CASE(Q), CASE(R), CASE(S), CASE(T), CASE(U), CASE(V),
+      CASE(W), CASE(X), CASE(Y), CASE(Z),
+  };
+#undef CASE
+  const auto iter = key2str.find(keycode);
+  if (iter == key2str.end()) {
+    return std::nullopt;
+  }
+  return iter->second;
+}
+
+std::optional<std::string> try_get_fnkey(ushort keycode) {
+  // Or these...
+#define STRINGIFY(x) #x
+#define CASE(i) \
+  { kVK_F##i, STRINGIFY(F##i) }
+  static const std::unordered_map<ushort, std::string> key2str = {
+      CASE(1),  CASE(2),  CASE(3),  CASE(4),  CASE(5),  CASE(6),
+      CASE(7),  CASE(8),  CASE(9),  CASE(10), CASE(11), CASE(12),
+      CASE(13), CASE(14), CASE(15), CASE(16),
+  };
+#undef CASE
+#undef STRINGIFY
+  const auto iter = key2str.find(keycode);
+  if (iter == key2str.end()) {
+    return std::nullopt;
+  }
+  return iter->second;
+}
+
+std::string lookup_keysym(ushort keycode) {
+  // Full enum definition:
+  // https://github.com/phracker/MacOSX-SDKs/blob/ef9fe35d5691b6dd383c8c46d867a499817a01b6/MacOSX10.6.sdk/System/Library/Frameworks/Carbon.framework/Versions/A/Frameworks/HIToolbox.framework/Versions/A/Headers/Events.h#L198-L315
+  switch (keycode) {
+    case kVK_LeftArrow:
+      return "Left";
+    case kVK_RightArrow:
+      return "Right";
+    case kVK_UpArrow:
+      return "Up";
+    case kVK_DownArrow:
+      return "Down";
+    case kVK_Tab:
+      return "Tab";
+    case kVK_Return:
+      return "Return";
+    // Mac Delete = Backspace on other platforms
+    // Mac ForwardDelete (Fn + Delete) = Delete on other platforms
+    case kVK_Delete:
+      return "BackSpace";
+    case kVK_Escape:
+      return "Escape";
+    case kVK_Shift:
+      return "Shift_L";
+    case kVK_RightShift:
+      return "Shift_R";
+    // Shall we interpret Command key as Ctrl?
+    case kVK_Control:
+      return "Control_L";
+    case kVK_RightControl:
+      return "Control_R";
+    // Mac Option = Alt on other platforms
+    case kVK_Option:
+      return "Alt_L";
+    case kVK_RightOption:
+      return "Alt_R";
+    case kVK_CapsLock:
+      return "Caps_Lock";
+    default:
+      break;
+  }
+  auto val_opt = try_get_alnum(keycode);
+  if (val_opt.has_value()) {
+    return *val_opt;
+  }
+  val_opt = try_get_fnkey(keycode);
+  if (val_opt.has_value()) {
+    return *val_opt;
+  }
+  return "Vk" + std::to_string((int)keycode);
+}
+
+// TODO(k-ye): Define all the magic numbers for Obj-C enums here
+constexpr int NSApplicationActivationPolicyRegular = 0;
+constexpr int NSEventTypeKeyDown = 10;
+constexpr int NSEventTypeKeyUp = 11;
+
 }  // namespace
 
 extern id NSApp;
@@ -36,7 +145,7 @@ typedef struct AppDel {
 class IdComparator {
  public:
   bool operator()(id a, id b) const {
-    TC_STATIC_ASSERT(sizeof(a) == sizeof(taichi::int64));
+    TI_STATIC_ASSERT(sizeof(a) == sizeof(taichi::int64));
     return taichi::bit::reinterpret_bits<taichi::int64>(a) <
            taichi::bit::reinterpret_bits<taichi::int64>(b);
   }
@@ -105,7 +214,7 @@ __attribute__((constructor)) static void initView() {
   objc_registerClassPair(AppDelClass);
 }
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 void GUI::create_window() {
   clscall("NSApplication", "sharedApplication");
@@ -113,6 +222,20 @@ void GUI::create_window() {
     fprintf(stderr, "Failed to initialized NSApplication.\nterminating.\n");
     return;
   }
+  // I finally found how to bring the NSWindow to the front and to handle
+  // keyboard events in these posts:
+  // https://stackoverflow.com/a/11010614/12003165
+  // http://www.cocoawithlove.com/2010/09/minimalist-cocoa-programming.html
+  //
+  // The problem was that, a Cocoa app without NIB files (app bundle,
+  // info.plist, whatever the meta files are) by default has a policy of
+  // NSApplicationActivationPolicyProhibited.
+  // (https://developer.apple.com/documentation/appkit/nsapplicationactivationpolicy/nsapplicationactivationpolicyprohibited?language=objc)
+  call(NSApp, "setActivationPolicy:", NSApplicationActivationPolicyRegular);
+  // This doesn't seem necessary, but in case there's some weird bug causing the
+  // Window not to be brought to the front, try enable this.
+  // https://stackoverflow.com/a/7460187/12003165
+  // call(NSApp, "activateIgnoringOtherApps:", YES);
   img_data_length = width * height * 4;
   img_data.resize(img_data_length);
   auto appDelObj = clscall("AppDelegate", "alloc");
@@ -145,14 +268,28 @@ void GUI::process_event() {
       call(NSApp, "sendEvent:", event);
       call(NSApp, "updateWindows");
       auto p = cast_call<CGPoint>(event, "locationInWindow");
+      ushort keycode = 0;
+      std::string keysym;
       switch (event_type) {
         case 1:  // NSLeftMouseDown
           set_mouse_pos(p.x, p.y);
           mouse_event(MouseEvent{MouseEvent::Type::press, cursor_pos});
+          key_events.push_back(
+              GUI::KeyEvent{GUI::KeyEvent::Type::press, "LMB", cursor_pos});
           break;
         case 2:  // NSLeftMouseUp
           set_mouse_pos(p.x, p.y);
           mouse_event(MouseEvent{MouseEvent::Type::release, cursor_pos});
+          key_events.push_back(
+              GUI::KeyEvent{GUI::KeyEvent::Type::release, "LMB", cursor_pos});
+          break;
+        case 3:  // NSEventTypeRightMouseDown
+          key_events.push_back(
+              GUI::KeyEvent{GUI::KeyEvent::Type::press, "RMB", cursor_pos});
+          break;
+        case 4:  // NSEventTypeRightMouseUp
+          key_events.push_back(
+              GUI::KeyEvent{GUI::KeyEvent::Type::release, "RMB", cursor_pos});
           break;
         case 5:   // NSMouseMoved
         case 6:   // NSLeftMouseDragged
@@ -161,6 +298,15 @@ void GUI::process_event() {
           set_mouse_pos(p.x, p.y);
           mouse_event(MouseEvent{MouseEvent::Type::move, Vector2i(p.x, p.y)});
           break;
+        case NSEventTypeKeyDown:
+        case NSEventTypeKeyUp:
+          keycode = cast_call<ushort>(event, "keyCode");
+          keysym = lookup_keysym(keycode);
+          auto kev_type = (event_type == NSEventTypeKeyDown)
+                              ? KeyEvent::Type::press
+                              : KeyEvent::Type::release;
+          key_events.push_back(KeyEvent{kev_type, keysym, cursor_pos});
+          break;
       }
     } else {
       break;
@@ -178,6 +324,6 @@ void GUI::redraw() { call(view, "setNeedsDisplay:", YES); }
 
 GUI::~GUI() {}
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 #endif
\ No newline at end of file
diff --git a/taichi/gui/gui.cpp b/taichi/gui/gui.cpp
index 4aba17d94034c..672c849f41c8c 100644
--- a/taichi/gui/gui.cpp
+++ b/taichi/gui/gui.cpp
@@ -1,6 +1,6 @@
 #include <taichi/visual/gui.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 Vector2 Canvas::Line::vertices[128];
 
@@ -30,4 +30,4 @@ void Canvas::circle_single(real x, real y, uint32 color, real radius) {
   circle(x, y).radius(radius).color(color).finish();
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/gui/win32.cpp b/taichi/gui/win32.cpp
index 535027858a5b6..37d85f967ca46 100644
--- a/taichi/gui/win32.cpp
+++ b/taichi/gui/win32.cpp
@@ -1,6 +1,6 @@
 #include <taichi/common/util.h>
 
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
 #include <windowsx.h>
 #include <taichi/common/task.h>
 #include <taichi/visual/gui.h>
@@ -128,7 +128,7 @@ LRESULT CALLBACK WindowProc(HWND hwnd,
   return DefWindowProc(hwnd, uMsg, wParam, lParam);
 }
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 void GUI::process_event() {
   MSG msg;
@@ -165,7 +165,7 @@ void GUI::create_window() {
   gui_from_hwnd[hwnd] = this;
 
   if (hwnd == NULL) {
-    TC_ERROR("Window creation failed");
+    TI_ERROR("Window creation failed");
   }
 
   ShowWindow(hwnd, SW_SHOWDEFAULT);
@@ -202,6 +202,6 @@ GUI::~GUI() {
   gui_from_hwnd.erase(hwnd);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 #endif
diff --git a/taichi/gui/x11.cpp b/taichi/gui/x11.cpp
index 6740c92975ad1..84675419c8b4f 100644
--- a/taichi/gui/x11.cpp
+++ b/taichi/gui/x11.cpp
@@ -1,6 +1,6 @@
 #include <taichi/visual/gui.h>
 
-#if defined(TC_GUI_X11)
+#if defined(TI_GUI_X11)
 #include <X11/Xlib.h>
 #include <X11/Xutil.h>
 
@@ -12,7 +12,7 @@
 #undef Success
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class CXImage {
  public:
@@ -24,7 +24,7 @@ class CXImage {
     image_data.resize(width * height * 4);
     image = XCreateImage(display, visual, 24, ZPixmap, 0,
                          (char *)image_data.data(), width, height, 32, 0);
-    TC_ASSERT((void *)image->data == image_data.data());
+    TI_ASSERT((void *)image->data == image_data.data());
   }
 
   void set_data(const Array2D<Vector4> &color) {
@@ -130,6 +130,6 @@ GUI::~GUI() {
   delete img;
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 #endif
diff --git a/taichi/io/amal_base64.cpp b/taichi/io/amal_base64.cpp
index ef9e0dcb2305d..2094821782877 100644
--- a/taichi/io/amal_base64.cpp
+++ b/taichi/io/amal_base64.cpp
@@ -1,10 +1,10 @@
 #include <taichi/io/base64.h>
 #include <taichi/common/task.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 auto amal_base64 = [](const std::vector<std::string> &param) {
-  TC_ASSERT(param.size() >= 1);
+  TI_ASSERT(param.size() >= 1);
   auto fn = param[0];
   std::ifstream input(fn);
   std::string str((std::istreambuf_iterator<char>(input)),
@@ -17,7 +17,7 @@ auto amal_base64 = [](const std::vector<std::string> &param) {
         65500 / line_width *
         line_width;  // MSVC cannot deal with literal with length > 65535
     fmt::print(fo,
-               "#include <taichi/common/util.h>\n\nTC_NAMESPACE_BEGIN\n\n\n");
+               "#include <taichi/common/util.h>\n\nTI_NAMESPACE_BEGIN\n\n\n");
     int num_literals = 0;
     for (int l = 0; l < (int)encoded.size(); l += maximum_literal_length) {
       fmt::print(fo, "const std::string {}_{:04d} =\n", param[1], num_literals);
@@ -37,11 +37,11 @@ auto amal_base64 = [](const std::vector<std::string> &param) {
         fmt::print(fo, " + ", param[1], i);
     }
 
-    fmt::print(fo, ";\n\nTC_NAMESPACE_END");
+    fmt::print(fo, ";\n\nTI_NAMESPACE_END");
     std::fclose(fo);
   }
 };
 
-TC_REGISTER_TASK(amal_base64);
+TI_REGISTER_TASK(amal_base64);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/io/base64.h b/taichi/io/base64.h
index d20dd21d577a2..75ed44e9f6a0c 100644
--- a/taichi/io/base64.h
+++ b/taichi/io/base64.h
@@ -1,6 +1,6 @@
 #include <taichi/common/util.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 /*
    base64.cpp and base64.h
 
@@ -129,4 +129,4 @@ inline std::string base64_decode(std::string const &encoded_string) {
   return ret;
 }
 
-TC_NAMESPACE_END
\ No newline at end of file
+TI_NAMESPACE_END
\ No newline at end of file
diff --git a/taichi/io/binary_stream.h b/taichi/io/binary_stream.h
index f246696819a16..7cd971ed430b7 100644
--- a/taichi/io/binary_stream.h
+++ b/taichi/io/binary_stream.h
@@ -8,7 +8,7 @@
 #include <taichi/common/interface.h>
 #include <cstdio>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class BinaryFileStreamInput final {
  private:
@@ -42,4 +42,4 @@ class BinaryFileStreamOutput final {
   }
 };
 
-TC_NAMESPACE_END
\ No newline at end of file
+TI_NAMESPACE_END
\ No newline at end of file
diff --git a/taichi/io/io.h b/taichi/io/io.h
index aa5ca93879dbe..9ae83bc17c18f 100644
--- a/taichi/io/io.h
+++ b/taichi/io/io.h
@@ -11,14 +11,15 @@
 #include <cstdio>
 #include <cstdlib>
 
-#if !defined(TC_PLATFORM_OSX)
+#if defined(TI_PLATFORM_WINDOWS)
 #include <filesystem>
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
+// TODO: move to std::filesystem after it's nonexperimental on all platforms
 inline void create_directories(const std::string &dir) {
-#if !defined(TC_PLATFORM_OSX)
+#if defined(TI_PLATFORM_WINDOWS)
   std::filesystem::create_directories(dir);
 #else
   std::system(fmt::format("mkdir -p {}", dir).c_str());
@@ -79,4 +80,4 @@ bool read_vector_from_disk(std::vector<T> *p_vec, std::string fn) {
 
 using WushiParticles = std::map<std::string, std::vector<float32>>;
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/io/ply.cpp b/taichi/io/ply.cpp
index 45f1a6689c4db..7368e86bf8ddd 100644
--- a/taichi/io/ply.cpp
+++ b/taichi/io/ply.cpp
@@ -6,7 +6,7 @@
 #include <taichi/io/ply_writer.h>
 #include <taichi/io/io.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class TestPLY : public Task {
   std::string run() override {
@@ -23,7 +23,7 @@ class TestPLY : public Task {
   }
 };
 
-TC_IMPLEMENTATION(Task, TestPLY, "test_ply");
+TI_IMPLEMENTATION(Task, TestPLY, "test_ply");
 
 // n, data_ptr, fn, name0, name1, name2, name3, ...
 auto write_tcb_c_2 = [](const std::vector<std::string> &parameters) {
@@ -42,6 +42,6 @@ auto write_tcb_c_2 = [](const std::vector<std::string> &parameters) {
   write_to_binary_file(data, fn);
 };
 
-TC_REGISTER_TASK(write_tcb_c_2);
+TI_REGISTER_TASK(write_tcb_c_2);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/io/ply_writer.h b/taichi/io/ply_writer.h
index 92674256c016d..b9dca8294adac 100644
--- a/taichi/io/ply_writer.h
+++ b/taichi/io/ply_writer.h
@@ -4,7 +4,7 @@
 *******************************************************************************/
 #include <taichi/util.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class PLYWriter {
  public:
@@ -75,4 +75,4 @@ class PLYWriter {
   }
 };
 
-TC_NAMESPACE_END
\ No newline at end of file
+TI_NAMESPACE_END
\ No newline at end of file
diff --git a/taichi/ir.cpp b/taichi/ir.cpp
index 7da203f386402..82fe4eef4eb40 100644
--- a/taichi/ir.cpp
+++ b/taichi/ir.cpp
@@ -7,7 +7,7 @@
 
 TLANG_NAMESPACE_BEGIN
 
-#define TC_EXPRESSION_IMPLEMENTATION
+#define TI_EXPRESSION_IMPLEMENTATION
 #include "expression.h"
 
 class StatementTypeNameVisitor : public IRVisitor {
@@ -31,19 +31,19 @@ std::string Stmt::type() {
 }
 
 void IRBuilder::insert(std::unique_ptr<Stmt> &&stmt, int location) {
-  TC_ASSERT(!stack.empty());
+  TI_ASSERT(!stack.empty());
   stack.back()->insert(std::move(stmt), location);
 }
 
 void IRBuilder::stop_gradient(SNode *snode) {
-  TC_ASSERT(!stack.empty());
+  TI_ASSERT(!stack.empty());
   stack.back()->stop_gradients.push_back(snode);
 }
 
 GetChStmt::GetChStmt(taichi::Tlang::Stmt *input_ptr, int chid)
     : input_ptr(input_ptr), chid(chid) {
   add_operand(this->input_ptr);
-  TC_ASSERT(input_ptr->is<SNodeLookupStmt>());
+  TI_ASSERT(input_ptr->is<SNodeLookupStmt>());
   input_snode = input_ptr->as<SNodeLookupStmt>()->snode;
   output_snode = input_snode->ch[chid].get();
 }
@@ -76,7 +76,7 @@ Expr bit_cast(const Expr &input, DataType dt) {
 }
 
 Expr Expr::operator[](ExprGroup indices) const {
-  TC_ASSERT(is<GlobalVariableExpression>() || is<ExternalTensorExpression>());
+  TI_ASSERT(is<GlobalVariableExpression>() || is<ExternalTensorExpression>());
   return Expr::make<GlobalPtrExpression>(*this, indices.loaded());
 }
 
@@ -95,7 +95,7 @@ IRBuilder &current_ast_builder() {
 
 std::unique_ptr<IRBuilder::ScopeGuard> IRBuilder::create_scope(
     std::unique_ptr<Block> &list) {
-  TC_ASSERT(list == nullptr);
+  TI_ASSERT(list == nullptr);
   list = std::make_unique<Block>();
   if (!stack.empty()) {
     list->parent = stack.back();
@@ -112,7 +112,7 @@ Expr &Expr::operator=(const Expr &o) {
           ptr_if_global(*this), load_if_ptr(o)));
     } else {
       // set(o.eval());
-      TC_ERROR("Cannot assign to non-lvalue: {}", serialize());
+      TI_ERROR("Cannot assign to non-lvalue: {}", serialize());
     }
   } else {
     set(o);
@@ -146,7 +146,7 @@ Expr::Expr(Identifier id) : Expr() {
 }
 
 Expr Expr::eval() const {
-  TC_ASSERT(expr != nullptr);
+  TI_ASSERT(expr != nullptr);
   if (is<EvalExpression>()) {
     return *this;
   }
@@ -176,11 +176,11 @@ void Expr::operator-=(const Expr &o) {
   }
 }
 void Expr::operator*=(const Expr &o) {
-  TC_ASSERT(!this->atomic);
+  TI_ASSERT(!this->atomic);
   (*this) = (*this) * load_if_ptr(o);
 }
 void Expr::operator/=(const Expr &o) {
-  TC_ASSERT(!this->atomic);
+  TI_ASSERT(!this->atomic);
   (*this) = (*this) / load_if_ptr(o);
 }
 
@@ -221,7 +221,7 @@ FrontendForStmt::FrontendForStmt(const ExprGroup &loop_var,
   if (cfg.arch == Arch::cuda) {
     vectorize = 1;
     parallelize = 1;
-    TC_ASSERT(block_dim <= max_gpu_block_dim);
+    TI_ASSERT(block_dim <= max_gpu_block_dim);
   } else {
     // cpu
     if (block_dim == 0)
@@ -249,7 +249,7 @@ IRNode *Stmt::get_ir_root() {
 
 FrontendAssignStmt::FrontendAssignStmt(const Expr &lhs, const Expr &rhs)
     : lhs(lhs), rhs(rhs) {
-  TC_ASSERT(lhs->is_lvalue());
+  TI_ASSERT(lhs->is_lvalue());
 }
 
 FrontendAtomicStmt::FrontendAtomicStmt(AtomicOpType op_type,
@@ -269,7 +269,7 @@ std::unique_ptr<FrontendContext> context;
 
 // TODO: clean this part up
 void *Expr::evaluate_addr(int i, int j, int k, int l) {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 }
 
 template <int i, typename... Indices>
@@ -290,10 +290,10 @@ template <typename... Indices>
 void *Expr::val_tmp(DataType dt, Indices... indices) {
   auto snode = this->cast<GlobalVariableExpression>()->snode;
   if (dt != snode->dt) {
-    TC_ERROR("Cannot access type {} as type {}", data_type_name(snode->dt),
+    TI_ERROR("Cannot access type {} as type {}", data_type_name(snode->dt),
              data_type_name(dt));
   }
-  TC_ASSERT(sizeof...(indices) == snode->num_active_indices);
+  TI_ASSERT(sizeof...(indices) == snode->num_active_indices);
   int ind[max_num_indices];
   std::memset(ind, 0, sizeof(ind));
   auto tup = std::make_tuple(indices...);
@@ -305,18 +305,18 @@ void *Expr::val_tmp(DataType dt, Indices... indices) {
   LOAD_IND(2);
   LOAD_IND(3);
 #undef LOAD_IND
-  TC_ASSERT(max_num_indices == 4);
+  TI_ASSERT(max_num_indices == 4);
   return evaluate_addr(ind[0], ind[1], ind[2], ind[3]);
 }
 
 Expr Expr::parent() const {
-  TC_ASSERT(is<GlobalVariableExpression>());
+  TI_ASSERT(is<GlobalVariableExpression>());
   return Expr::make<GlobalVariableExpression>(
       cast<GlobalVariableExpression>()->snode->parent);
 }
 
 SNode *Expr::snode() const {
-  TC_ASSERT(is<GlobalVariableExpression>());
+  TI_ASSERT(is<GlobalVariableExpression>());
   return cast<GlobalVariableExpression>()->snode;
 }
 
@@ -340,7 +340,7 @@ template void *Expr::val_tmp<int, int, int, int>(DataType, int, int, int, int);
 
 Stmt *Stmt::insert_before_me(std::unique_ptr<Stmt> &&new_stmt) {
   auto ret = new_stmt.get();
-  TC_ASSERT(parent);
+  TI_ASSERT(parent);
   auto &stmts = parent->statements;
   int loc = -1;
   for (int i = 0; i < (int)stmts.size(); i++) {
@@ -349,7 +349,7 @@ Stmt *Stmt::insert_before_me(std::unique_ptr<Stmt> &&new_stmt) {
       break;
     }
   }
-  TC_ASSERT(loc != -1);
+  TI_ASSERT(loc != -1);
   new_stmt->parent = parent;
   stmts.insert(stmts.begin() + loc, std::move(new_stmt));
   return ret;
@@ -357,7 +357,7 @@ Stmt *Stmt::insert_before_me(std::unique_ptr<Stmt> &&new_stmt) {
 
 Stmt *Stmt::insert_after_me(std::unique_ptr<Stmt> &&new_stmt) {
   auto ret = new_stmt.get();
-  TC_ASSERT(parent);
+  TI_ASSERT(parent);
   auto &stmts = parent->statements;
   int loc = -1;
   for (int i = 0; i < (int)stmts.size(); i++) {
@@ -366,7 +366,7 @@ Stmt *Stmt::insert_after_me(std::unique_ptr<Stmt> &&new_stmt) {
       break;
     }
   }
-  TC_ASSERT(loc != -1);
+  TI_ASSERT(loc != -1);
   new_stmt->parent = parent;
   stmts.insert(stmts.begin() + loc + 1, std::move(new_stmt));
   return ret;
@@ -422,19 +422,19 @@ std::string to_string(const LaneAttribute<LocalAddress> &ptr) {
 
 Stmt *LocalLoadStmt::previous_store_or_alloca_in_block() {
   int position = parent->locate(this);
-  // TC_ASSERT(width() == 1);
-  // TC_ASSERT(this->ptr[0].offset == 0);
+  // TI_ASSERT(width() == 1);
+  // TI_ASSERT(this->ptr[0].offset == 0);
   for (int i = position - 1; i >= 0; i--) {
     if (parent->statements[i]->is<LocalStoreStmt>()) {
       auto store = parent->statements[i]->as<LocalStoreStmt>();
-      // TC_ASSERT(store->width() == 1);
+      // TI_ASSERT(store->width() == 1);
       if (store->ptr == this->ptr[0].var) {
         // found
         return store;
       }
     } else if (parent->statements[i]->is<AllocaStmt>()) {
       auto alloca = parent->statements[i]->as<AllocaStmt>();
-      // TC_ASSERT(alloca->width() == 1);
+      // TI_ASSERT(alloca->width() == 1);
       if (alloca == this->ptr[0].var) {
         return alloca;
       }
@@ -480,7 +480,7 @@ void Block::insert(VecStatement &&stmt, int location) {
 void Block::replace_statements_in_range(int start,
                                         int end,
                                         VecStatement &&stmts) {
-  TC_ASSERT(start <= end);
+  TI_ASSERT(start <= end);
   for (int i = 0; i < end - start; i++) {
     erase(start);
   }
@@ -565,16 +565,16 @@ std::string OffloadedStmt::task_name() const {
   } else if (task_type == TaskType::struct_for) {
     return "struct_for";
   } else if (task_type == TaskType::clear_list) {
-    TC_ASSERT(snode);
+    TI_ASSERT(snode);
     return fmt::format("clear_list_{}", snode->get_node_type_name_hinted());
   } else if (task_type == TaskType::listgen) {
-    TC_ASSERT(snode);
+    TI_ASSERT(snode);
     return fmt::format("listgen_{}", snode->get_node_type_name_hinted());
   } else if (task_type == TaskType::gc) {
-    TC_ASSERT(snode);
+    TI_ASSERT(snode);
     return fmt::format("gc_{}", snode->name);
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
diff --git a/taichi/ir.h b/taichi/ir.h
index 2a3ec49ddbc82..e6db6667e7e52 100644
--- a/taichi/ir.h
+++ b/taichi/ir.h
@@ -29,7 +29,7 @@ class DiffRange {
   }
 
   DiffRange(bool related, int coeff) : DiffRange(related, 0, 0) {
-    TC_ASSERT(related == false);
+    TI_ASSERT(related == false);
   }
 
   DiffRange(bool related, int coeff, int low)
@@ -52,7 +52,7 @@ class DiffRange {
   }
 
   bool certain() {
-    TC_ASSERT(related);
+    TI_ASSERT(related);
     return high == low + 1;
   }
 };
@@ -321,7 +321,7 @@ class IRVisitor {
   // default visitor
   virtual void visit(Stmt *stmt) {
     if (!allow_undefined_visitor) {
-      TC_ERROR(
+      TI_ERROR(
           "missing visitor function. Is the statement class registered via "
           "DEFINE_VISIT?");
     }
@@ -333,7 +333,7 @@ class IRVisitor {
       if (invoke_default_visitor)  \
         visit((Stmt *)stmt);       \
     } else                         \
-      TC_NOT_IMPLEMENTED;          \
+      TI_NOT_IMPLEMENTED;          \
   }
 
   DEFINE_VISIT(Block);
@@ -345,7 +345,7 @@ class IRVisitor {
 class IRNode {
  public:
   virtual void accept(IRVisitor *visitor) {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
   virtual ~IRNode() = default;
 };
@@ -383,12 +383,12 @@ struct LaneAttribute {
   }
 
   T &operator[](int i) {
-    TC_ASSERT(0 <= i && i < (int)data.size());
+    TI_ASSERT(0 <= i && i < (int)data.size());
     return data[i];
   }
 
   const T &operator[](int i) const {
-    TC_ASSERT(0 <= i && i < (int)data.size());
+    TI_ASSERT(0 <= i && i < (int)data.size());
     return data[i];
   }
 
@@ -399,7 +399,7 @@ struct LaneAttribute {
 
   // for initializing single lane
   LaneAttribute &operator=(const T &t) {
-    TC_ASSERT(data.size() == 1);
+    TI_ASSERT(data.size() == 1);
     data[0] = t;
     return *this;
   }
@@ -432,8 +432,8 @@ struct LaneAttribute {
     } else if (bracket == "(") {
       ret += ")";
     } else if (bracket != "") {
-      TC_P(bracket);
-      TC_NOT_IMPLEMENTED
+      TI_P(bracket);
+      TI_NOT_IMPLEMENTED
     }
     return ret;
   }
@@ -453,14 +453,14 @@ struct LaneAttribute {
     } else if (bracket == "(") {
       ret += ")";
     } else if (bracket != "") {
-      TC_P(bracket);
-      TC_NOT_IMPLEMENTED
+      TI_P(bracket);
+      TI_NOT_IMPLEMENTED
     }
     return ret;
   }
 
   operator T() const {
-    TC_ASSERT(data.size() == 1);
+    TI_ASSERT(data.size() == 1);
     return data[0];
   }
 
@@ -551,7 +551,7 @@ class Stmt : public IRNode {
 
   template <typename T>
   T *as() {
-    TC_ASSERT(is<T>());
+    TI_ASSERT(is<T>());
     return dynamic_cast<T *>(this);
   }
 
@@ -560,12 +560,12 @@ class Stmt : public IRNode {
     return dynamic_cast<T *>(this);
   }
 
-  TC_FORCE_INLINE int num_operands() const {
+  TI_FORCE_INLINE int num_operands() const {
     return (int)operands.size();
   }
 
-  TC_FORCE_INLINE Stmt *operand(int i) const {
-    // TC_ASSERT(0 <= i && i < (int)operands.size());
+  TI_FORCE_INLINE Stmt *operand(int i) const {
+    // TI_ASSERT(0 <= i && i < (int)operands.size());
     return *operands[i];
   }
 
@@ -599,10 +599,10 @@ class Stmt : public IRNode {
   }
 
   virtual void rebuild_operands() {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
 
-  TC_FORCE_INLINE bool may_have_operand(Stmt *stmt) const {
+  TI_FORCE_INLINE bool may_have_operand(Stmt *stmt) const {
     return (operand_bitmap & operand_hash(stmt)) != 0;
   }
 
@@ -669,7 +669,7 @@ class Expression {
   virtual std::string serialize() = 0;
 
   virtual void flatten(VecStatement &ret) {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   };
 
   virtual bool is_lvalue() const {
@@ -685,7 +685,7 @@ class Expression {
 
   std::string get_attribute(const std::string &key) const {
     if (auto it = attributes.find(key); it == attributes.end()) {
-      TC_ERROR("Attribute {} not found.", key);
+      TI_ERROR("Attribute {} not found.", key);
     } else {
       return it->second;
     }
@@ -805,7 +805,7 @@ class UnaryOpStmt : public Stmt {
 
   UnaryOpStmt(UnaryOpType op_type, Stmt *operand)
       : op_type(op_type), operand(operand) {
-    TC_ASSERT(!operand->is<AllocaStmt>());
+    TI_ASSERT(!operand->is<AllocaStmt>());
     add_operand(this->operand);
     cast_type = DataType::unknown;
     cast_by_value = true;
@@ -970,8 +970,8 @@ class BinaryOpStmt : public Stmt {
 
   BinaryOpStmt(BinaryOpType op_type, Stmt *lhs, Stmt *rhs)
       : op_type(op_type), lhs(lhs), rhs(rhs) {
-    TC_ASSERT(!lhs->is<AllocaStmt>());
-    TC_ASSERT(!rhs->is<AllocaStmt>());
+    TI_ASSERT(!lhs->is<AllocaStmt>());
+    TI_ASSERT(!rhs->is<AllocaStmt>());
     add_operand(this->lhs);
     add_operand(this->rhs);
   }
@@ -989,9 +989,9 @@ class TernaryOpStmt : public Stmt {
 
   TernaryOpStmt(TernaryOpType op_type, Stmt *op1, Stmt *op2, Stmt *op3)
       : op_type(op_type), op1(op1), op2(op2), op3(op3) {
-    TC_ASSERT(!op1->is<AllocaStmt>());
-    TC_ASSERT(!op2->is<AllocaStmt>());
-    TC_ASSERT(!op3->is<AllocaStmt>());
+    TI_ASSERT(!op1->is<AllocaStmt>());
+    TI_ASSERT(!op2->is<AllocaStmt>());
+    TI_ASSERT(!op3->is<AllocaStmt>());
     add_operand(this->op1);
     add_operand(this->op2);
     add_operand(this->op3);
@@ -1087,8 +1087,8 @@ class ExternalPtrStmt : public Stmt {
       : base_ptrs(base_ptrs), indices(indices) {
     DataType dt = DataType::f32;
     for (int i = 0; i < (int)base_ptrs.size(); i++) {
-      TC_ASSERT(base_ptrs[i] != nullptr);
-      TC_ASSERT(base_ptrs[i]->is<ArgLoadStmt>());
+      TI_ASSERT(base_ptrs[i] != nullptr);
+      TI_ASSERT(base_ptrs[i]->is<ArgLoadStmt>());
     }
     for (int i = 0; i < (int)base_ptrs.size(); i++) {
       add_operand(this->base_ptrs[i]);
@@ -1118,8 +1118,8 @@ class GlobalPtrStmt : public Stmt {
       : snodes(snodes), indices(indices) {
     activate = true;  // use a strong access by default
     for (int i = 0; i < (int)snodes.size(); i++) {
-      TC_ASSERT(snodes[i] != nullptr);
-      TC_ASSERT(snodes[0]->dt == snodes[i]->dt);
+      TI_ASSERT(snodes[i] != nullptr);
+      TI_ASSERT(snodes[0]->dt == snodes[i]->dt);
     }
     for (int i = 0; i < (int)indices.size(); i++) {
       add_operand(this->indices[i]);
@@ -1197,7 +1197,7 @@ class GlobalVariableExpression : public Expression {
   }
 
   void flatten(VecStatement &ret) override {
-    TC_ASSERT(snode->num_active_indices == 0);
+    TI_ASSERT(snode->num_active_indices == 0);
     auto ptr = Stmt::make<GlobalPtrStmt>(LaneAttribute<SNode *>(snode),
                                          std::vector<Stmt *>());
     ret.push_back(std::move(ptr));
@@ -1234,7 +1234,7 @@ class GlobalPtrExpression : public Expression {
       ret.push_back(std::make_unique<GlobalPtrStmt>(
           var.cast<GlobalVariableExpression>()->snode, index_stmts));
     } else {
-      TC_ASSERT(var.is<ExternalTensorExpression>());
+      TI_ASSERT(var.is<ExternalTensorExpression>());
       var->flatten(ret);
       ret.push_back(std::make_unique<ExternalPtrStmt>(
           var.cast<ExternalTensorExpression>()->stmt, index_stmts));
@@ -1327,7 +1327,7 @@ class Block : public IRNode {
         break;
       }
     }
-    TC_ASSERT(location != -1);
+    TI_ASSERT(location != -1);
     for (int i = (int)new_statements.size() - 1; i >= 0; i--) {
       insert(std::move(new_statements[i]), location);
     }
@@ -1343,7 +1343,7 @@ class Block : public IRNode {
         break;
       }
     }
-    TC_ASSERT(location != -1);
+    TI_ASSERT(location != -1);
     if (replace_usages)
       old_statement->replace_with(new_statements.back().get());
     trash_bin.push_back(std::move(statements[location]));
@@ -1403,10 +1403,10 @@ class FrontendSNodeOpStmt : public Stmt {
                       Expr val = Expr(nullptr))
       : op_type(op_type), snode(snode), indices(indices.loaded()), val(val) {
     if (val.expr != nullptr) {
-      TC_ASSERT(op_type == SNodeOpType::append);
+      TI_ASSERT(op_type == SNodeOpType::append);
       this->val.set(load_if_ptr(val));
     } else {
-      TC_ASSERT(op_type != SNodeOpType::append);
+      TI_ASSERT(op_type != SNodeOpType::append);
     }
   }
 
@@ -1423,8 +1423,6 @@ class SNodeOpStmt : public Stmt {
 
   SNodeOpStmt(SNodeOpType op_type, SNode *snode, Stmt *ptr, Stmt *val = nullptr)
       : op_type(op_type), snode(snode), ptr(ptr), val(val) {
-    TC_ASSERT((val == nullptr) != (op_type == SNodeOpType::append ||
-                                   op_type == SNodeOpType::is_active));
     add_operand(this->ptr);
     if (val)
       add_operand(this->val);
@@ -1436,7 +1434,8 @@ class SNodeOpStmt : public Stmt {
       : op_type(op_type), snode(snode), indices(indices) {
     ptr = nullptr;
     val = nullptr;
-    TC_ASSERT(op_type == SNodeOpType::is_active);
+    TI_ASSERT(op_type == SNodeOpType::is_active ||
+              op_type == SNodeOpType::deactivate);
     add_operand(this->ptr);
     for (int i = 0; i < (int)indices.size(); i++) {
       add_operand(this->indices[i]);
@@ -1445,6 +1444,11 @@ class SNodeOpStmt : public Stmt {
     element_type() = DataType::i32;
   }
 
+  static bool activation_related(SNodeOpType op) {
+    return op == SNodeOpType::activate || op == SNodeOpType::deactivate ||
+           op == SNodeOpType::is_active;
+  }
+
   DEFINE_ACCEPT
 };
 
@@ -1466,7 +1470,7 @@ class AssertStmt : public Stmt {
 
   AssertStmt(const std::string &text, Stmt *val) : text(text), val(val) {
     add_operand(this->val);
-    TC_ASSERT(val);
+    TI_ASSERT(val);
   }
 
   DEFINE_ACCEPT
@@ -1700,7 +1704,7 @@ class ConstStmt : public Stmt {
     width() = val.size();
     element_type() = val[0].dt;
     for (int i = 0; i < ret_type.width; i++) {
-      TC_ASSERT(val[0].dt == val[i].dt);
+      TI_ASSERT(val[0].dt == val[i].dt);
     }
   }
 
@@ -1967,7 +1971,7 @@ class AtomicOpExpression : public Expression {
                          val.serialize());
     } else {
       // min/max not supported in the LLVM backend yet.
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
     }
   }
 
@@ -2019,7 +2023,7 @@ class SNodeOpExpression : public Expression {
     if (op_type == SNodeOpType::is_active) {
       // is_active cannot be lowered all the way to a global pointer.
       // It should be lowered into a pointer to parent and an index.
-      TC_ERROR_IF(
+      TI_ERROR_IF(
           snode->type != SNodeType::pointer && snode->type != SNodeType::hash,
           "ti.is_active only works on hash and pointer nodes.");
       ret.push_back<SNodeOpStmt>(SNodeOpType::is_active, snode, indices_stmt);
@@ -2029,11 +2033,11 @@ class SNodeOpExpression : public Expression {
         value->flatten(ret);
         ret.push_back<SNodeOpStmt>(SNodeOpType::append, snode, ptr,
                                    ret.back().get());
-        TC_ERROR_IF(snode->type != SNodeType::dynamic,
+        TI_ERROR_IF(snode->type != SNodeType::dynamic,
                     "ti.append only works on dynamic nodes.");
-        TC_ERROR_IF(snode->ch.size() != 1,
+        TI_ERROR_IF(snode->ch.size() != 1,
                     "ti.append only works on single-child dynamic nodes.");
-        TC_ERROR_IF(data_type_size(snode->ch[0]->dt) != 4,
+        TI_ERROR_IF(data_type_size(snode->ch[0]->dt) != 4,
                     "ti.append only works on i32/f32 nodes.");
       } else if (op_type == SNodeOpType::length) {
         ret.push_back<SNodeOpStmt>(SNodeOpType::length, snode, ptr, nullptr);
@@ -2081,12 +2085,12 @@ class ConstExpression : public Expression {
 template <typename T, typename... Indices>
 T &Expr::val(Indices... indices) {
   auto e = this->cast<GlobalVariableExpression>();
-  TC_ASSERT(is<GlobalVariableExpression>());
+  TI_ASSERT(is<GlobalVariableExpression>());
   return *(T *)val_tmp(get_data_type<T>(), indices...);
 }
 
 inline Expr load(Expr ptr) {
-  TC_ASSERT(ptr.is<GlobalPtrExpression>());
+  TI_ASSERT(ptr.is<GlobalPtrExpression>());
   return Expr::make<GlobalLoadExpression>(ptr);
 }
 
@@ -2094,7 +2098,7 @@ inline Expr load_if_ptr(const Expr &ptr) {
   if (ptr.is<GlobalPtrExpression>()) {
     return load(ptr);
   } else if (ptr.is<GlobalVariableExpression>()) {
-    TC_ASSERT(ptr.cast<GlobalVariableExpression>()->snode->num_active_indices ==
+    TI_ASSERT(ptr.cast<GlobalVariableExpression>()->snode->num_active_indices ==
               0);
     return load(ptr[ExprGroup()]);
   } else
@@ -2104,7 +2108,7 @@ inline Expr load_if_ptr(const Expr &ptr) {
 inline Expr ptr_if_global(const Expr &var) {
   if (var.is<GlobalVariableExpression>()) {
     // singleton global variable
-    TC_ASSERT(var.snode()->num_active_indices == 0);
+    TI_ASSERT(var.snode()->num_active_indices == 0);
     return var[ExprGroup()];
   } else {
     // may be any local or global expr
@@ -2139,7 +2143,7 @@ inline void CacheL1(const Expr &var) {
 }
 
 inline void BlockDim(int v) {
-  TC_ASSERT(bit::is_power_of_two(v));
+  TI_ASSERT(bit::is_power_of_two(v));
   dec.block_dim = v;
 }
 
diff --git a/taichi/kernel.cpp b/taichi/kernel.cpp
index 9eab8355ea7c3..22a2bf8784146 100644
--- a/taichi/kernel.cpp
+++ b/taichi/kernel.cpp
@@ -1,7 +1,7 @@
 #include <taichi/common/task.h>
 #include "kernel.h"
 #include "program.h"
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
 #include <cuda_runtime.h>
 #include "cuda_utils.h"
 #endif
@@ -46,7 +46,7 @@ void Kernel::operator()() {
   if (arch == Arch::cuda) {
     std::vector<void *> host_buffers(args.size());
     std::vector<void *> device_buffers(args.size());
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
     // copy data to GRAM
     bool has_buffer = false;
     for (int i = 0; i < (int)args.size(); i++) {
@@ -74,7 +74,7 @@ void Kernel::operator()() {
       }
     }
 #else
-    TC_ERROR("No CUDA");
+    TI_ERROR("No CUDA");
 #endif
   } else {
     auto &c = program.get_context();
@@ -84,7 +84,7 @@ void Kernel::operator()() {
 }
 
 void Kernel::set_arg_float(int i, float64 d) {
-  TC_ASSERT_INFO(args[i].is_nparray == false,
+  TI_ASSERT_INFO(args[i].is_nparray == false,
                  "Setting scalar value to numpy array argument is not allowed");
   auto dt = args[i].dt;
   if (dt == DataType::f32) {
@@ -104,7 +104,7 @@ void Kernel::set_arg_float(int i, float64 d) {
   } else if (dt == DataType::u64) {
     program.context.set_arg(i, (uint64)d);
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
@@ -113,7 +113,7 @@ void Kernel::set_extra_arg_int(int i, int j, int32 d) {
 }
 
 void Kernel::set_arg_int(int i, int64 d) {
-  TC_ASSERT_INFO(args[i].is_nparray == false,
+  TI_ASSERT_INFO(args[i].is_nparray == false,
                  "Setting scalar value to numpy array argument is not allowed");
   auto dt = args[i].dt;
   if (dt == DataType::i32) {
@@ -133,7 +133,7 @@ void Kernel::set_arg_int(int i, int64 d) {
   } else if (dt == DataType::f64) {
     program.context.set_arg(i, (float64)d);
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
@@ -142,14 +142,14 @@ void Kernel::mark_arg_return_value(int i, bool is_return) {
 }
 
 void Kernel::set_arg_nparray(int i, uint64 d, uint64 size) {
-  TC_ASSERT_INFO(args[i].is_nparray,
+  TI_ASSERT_INFO(args[i].is_nparray,
                  "Setting numpy array to scalar argument is not allowed");
   args[i].size = size;
   program.context.set_arg(i, d);
 }
 
 void Kernel::set_arch(Arch arch) {
-  TC_ASSERT(!compiled);
+  TI_ASSERT(!compiled);
   this->arch = arch;
 }
 
diff --git a/taichi/math/array.h b/taichi/math/array.h
index 0acb220f55bc8..04d48c4bfac10 100644
--- a/taichi/math/array.h
+++ b/taichi/math/array.h
@@ -9,7 +9,7 @@
 #include "array_2d.h"
 #include "array_3d.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 template <int dim>
 class IndexND;
 
@@ -19,4 +19,4 @@ class RegionND;
 template <int dim, typename T>
 class ArrayND;
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/array_1d.h b/taichi/math/array_1d.h
index 816c85f5feecd..df4986478ddf0 100644
--- a/taichi/math/array_1d.h
+++ b/taichi/math/array_1d.h
@@ -8,7 +8,7 @@
 #include "math.h"
 #include "array_fwd.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <typename T>
 class Array1D {
@@ -65,9 +65,9 @@ class Array1D {
     return 1;
   }
 
-  TC_IO_DECL {
-    TC_IO(size);
-    TC_IO(data);
+  TI_IO_DECL {
+    TI_IO(size);
+    TI_IO(data);
   }
 };
 
@@ -129,7 +129,7 @@ T Array1D<T>::abs_max() {
 
 template <typename T>
 void Array1D<T>::print(std::string name) {
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 }
 
 template <typename T>
@@ -177,4 +177,4 @@ Array1D<T> &Array1D<T>::operator=(const Array1D<T> &arr) {
 
 typedef Array1D<Vector2> ArrayVec2;
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/array_2d.h b/taichi/math/array_2d.h
index 89e83869e0ee5..122f8a7a584b1 100644
--- a/taichi/math/array_2d.h
+++ b/taichi/math/array_2d.h
@@ -14,7 +14,7 @@
 #include "array_fwd.h"
 #include "linalg.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <>
 class IndexND<2> {
@@ -794,4 +794,4 @@ inline void print(const Array2D<T> &arr) {
   arr.print("");
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/array_3d.h b/taichi/math/array_3d.h
index 41e7eb4880b19..84e62e38cc21f 100644
--- a/taichi/math/array_3d.h
+++ b/taichi/math/array_3d.h
@@ -14,7 +14,7 @@
 #include "array_fwd.h"
 #include "linalg.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <>
 class IndexND<3> {
@@ -25,7 +25,7 @@ class IndexND<3> {
   int i, j, k;
   Vector3 storage_offset;
 
-  TC_IO_DEF(i, j, k, x, y, z, storage_offset);
+  TI_IO_DEF(i, j, k, x, y, z, storage_offset);
   using Index3D = IndexND<3>;
 
   IndexND() {
@@ -127,11 +127,11 @@ class IndexND<3> {
     return *this;
   }
 
-  TC_FORCE_INLINE int operator[](int c) {
+  TI_FORCE_INLINE int operator[](int c) {
     return *(&i + c);
   }
 
-  TC_FORCE_INLINE int operator[](int c) const {
+  TI_FORCE_INLINE int operator[](int c) const {
     return *(&i + c);
   }
 
@@ -229,13 +229,13 @@ class RegionND<3> {
     return index_end;
   }
 
-  TC_IO_DECL {
-    TC_IO(x);
-    TC_IO(y);
-    TC_IO(z);
-    TC_IO(index_begin);
-    TC_IO(index_end);
-    TC_IO(storage_offset);
+  TI_IO_DECL {
+    TI_IO(x);
+    TI_IO(y);
+    TI_IO(z);
+    TI_IO(index_begin);
+    TI_IO(index_end);
+    TI_IO(storage_offset);
   }
 };
 
@@ -255,11 +255,11 @@ class ArrayND<3, T> {
     T *data;
     int offset;
 
-    TC_FORCE_INLINE Accessor2D(T *data, int offset)
+    TI_FORCE_INLINE Accessor2D(T *data, int offset)
         : data(data), offset(offset) {
     }
 
-    TC_FORCE_INLINE T *operator[](int i) const {
+    TI_FORCE_INLINE T *operator[](int i) const {
       return data + offset * i;
     }
   };
@@ -268,11 +268,11 @@ class ArrayND<3, T> {
     const T *data;
     int offset;
 
-    TC_FORCE_INLINE ConstAccessor2D(const T *data, int offset)
+    TI_FORCE_INLINE ConstAccessor2D(const T *data, int offset)
         : data(data), offset(offset) {
     }
 
-    TC_FORCE_INLINE const T *operator[](int i) const {
+    TI_FORCE_INLINE const T *operator[](int i) const {
       return data + offset * i;
     }
   };
@@ -282,15 +282,15 @@ class ArrayND<3, T> {
   template <typename S>
   using Array3D = ArrayND<3, S>;
 
-  TC_FORCE_INLINE int get_size() const {
+  TI_FORCE_INLINE int get_size() const {
     return size;
   }
 
-  TC_FORCE_INLINE const Region3D &get_region() const {
+  TI_FORCE_INLINE const Region3D &get_region() const {
     return region;
   }
 
-  TC_FORCE_INLINE ArrayND(const Vector3i &resolution,
+  TI_FORCE_INLINE ArrayND(const Vector3i &resolution,
                           T init = T(0),
                           Vector3 storage_offset = Vector3(0.5f)) {
     initialize(resolution, init, storage_offset);
@@ -422,11 +422,11 @@ class ArrayND<3, T> {
     }
   }
 
-  TC_FORCE_INLINE const Accessor2D operator[](int i) {
+  TI_FORCE_INLINE const Accessor2D operator[](int i) {
     return Accessor2D(&data[0] + i * stride, res[2]);
   }
 
-  TC_FORCE_INLINE const ConstAccessor2D operator[](int i) const {
+  TI_FORCE_INLINE const ConstAccessor2D operator[](int i) const {
     return ConstAccessor2D(&data[0] + i * stride, res[2]);
   }
 
@@ -612,19 +612,19 @@ class ArrayND<3, T> {
     return data.cend();
   }
 
-  TC_FORCE_INLINE T &operator[](const Vector3i &pos) {
+  TI_FORCE_INLINE T &operator[](const Vector3i &pos) {
     return data[(pos.x * res[1] + pos.y) * res[2] + pos.z];
   }
 
-  TC_FORCE_INLINE const T &operator[](const Vector3i &pos) const {
+  TI_FORCE_INLINE const T &operator[](const Vector3i &pos) const {
     return (*this)[pos.x][pos.y][pos.z];
   }
 
-  TC_FORCE_INLINE T &operator[](const Index3D &index) {
+  TI_FORCE_INLINE T &operator[](const Index3D &index) {
     return (*this)[index.i][index.j][index.k];
   }
 
-  TC_FORCE_INLINE const T &operator[](const Index3D &index) const {
+  TI_FORCE_INLINE const T &operator[](const Index3D &index) const {
     return (*this)[index.i][index.j][index.k];
   }
 
@@ -708,4 +708,4 @@ void print(const Array3D<T> &arr) {
 }
 
 void test_array_3d();
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/array_fwd.h b/taichi/math/array_fwd.h
index 8fd466fda37f1..d0aaeb49312c7 100644
--- a/taichi/math/array_fwd.h
+++ b/taichi/math/array_fwd.h
@@ -7,7 +7,7 @@
 
 #include <taichi/common/util.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <int dim>
 class IndexND;
@@ -27,4 +27,4 @@ class ArrayND;
 template <typename T, int dim>
 using TArray = ArrayND<dim, T>;
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/discrete_sampler.h b/taichi/math/discrete_sampler.h
index 79a2214bb18e3..c84fe725caa83 100644
--- a/taichi/math/discrete_sampler.h
+++ b/taichi/math/discrete_sampler.h
@@ -11,7 +11,7 @@
 
 #include "math.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class DiscreteSampler {
  private:
@@ -110,9 +110,9 @@ inline void test_discrete_sampler() {
     count[ds.sample(rand())]++;
   }
   for (int i = 0; i < n; i++) {
-    TC_P(i);
-    TC_P(count[i]);
+    TI_P(i);
+    TI_P(count[i]);
   }
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/geometry_util.h b/taichi/math/geometry_util.h
index 490da8fb93029..77980c55ad4c4 100644
--- a/taichi/math/geometry_util.h
+++ b/taichi/math/geometry_util.h
@@ -10,7 +10,7 @@
 #include <taichi/common/util.h>
 #include "linalg.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 inline bool intersect(const Vector2 &a,
                       const Vector2 &b,
@@ -129,4 +129,4 @@ inline Vector3 reflect(const Vector3 &d, const Vector3 &n) {
   return d - dot(d, n) * 2.0f * n;
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/linalg.h b/taichi/math/linalg.h
index dd3bb20ae5edd..93272e05d0ef8 100644
--- a/taichi/math/linalg.h
+++ b/taichi/math/linalg.h
@@ -13,7 +13,7 @@
 #include <taichi/common/util.h>
 #include "scalar.h"
 #include "array_fwd.h"
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 // Instruction Set Extension
 
@@ -91,13 +91,13 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
   using VectorBase::d;
   static constexpr int storage_elements = VectorBase::storage_elements;
 
-  TC_FORCE_INLINE VectorND() {
+  TI_FORCE_INLINE VectorND() {
     for (int i = 0; i < dim; i++) {
       this->d[i] = T(0);
     }
   }
 
-  static TC_FORCE_INLINE VectorND from_array(const T new_val[dim]) {
+  static TI_FORCE_INLINE VectorND from_array(const T new_val[dim]) {
     VectorND ret;
     for (int i = 0; i < dim; i++) {
       ret.d[i] = new_val[i];
@@ -106,14 +106,14 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
   }
 
   template <int dim_, typename T_, InstSetExt ISE_>
-  explicit TC_FORCE_INLINE VectorND(const VectorND<dim_, T_, ISE_> &o)
+  explicit TI_FORCE_INLINE VectorND(const VectorND<dim_, T_, ISE_> &o)
       : VectorND() {
     for (int i = 0; i < std::min(dim_, dim__); i++) {
       d[i] = o[i];
     }
   }
 
-  explicit TC_FORCE_INLINE VectorND(const std::array<T, dim> &o) {
+  explicit TI_FORCE_INLINE VectorND(const std::array<T, dim> &o) {
     for (int i = 0; i < dim; i++) {
       d[i] = o[i];
     }
@@ -126,14 +126,14 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
   // Vector initialization
   template <typename F,
             std::enable_if_t<std::is_same<F, VectorND>::value, int> = 0>
-  explicit TC_FORCE_INLINE VectorND(const F &f) {
+  explicit TI_FORCE_INLINE VectorND(const F &f) {
     for (int i = 0; i < dim; i++)
       this->d[i] = f[i];
   }
 
   // Scalar initialization
   template <typename F, std::enable_if_t<std::is_same<F, T>::value, int> = 0>
-  explicit TC_FORCE_INLINE VectorND(const F &f) {
+  explicit TI_FORCE_INLINE VectorND(const F &f) {
     for (int i = 0; i < dim; i++)
       this->d[i] = f;
   }
@@ -143,19 +143,19 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
       typename F,
       std::enable_if_t<std::is_convertible<F, std::function<T(int)>>::value,
                        int> = 0>
-  explicit TC_FORCE_INLINE VectorND(const F &f) {
+  explicit TI_FORCE_INLINE VectorND(const F &f) {
     for (int i = 0; i < dim; i++)
       this->d[i] = f(i);
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  explicit TC_FORCE_INLINE VectorND(T v) {
+  explicit TI_FORCE_INLINE VectorND(T v) {
     for (int i = 0; i < dim; i++) {
       this->d[i] = v;
     }
   }
 
-  explicit TC_FORCE_INLINE VectorND(T v0, T v1) {
+  explicit TI_FORCE_INLINE VectorND(T v0, T v1) {
     static_assert(dim == 2, "Vector dim must be 2");
     this->d[0] = v0;
     this->d[1] = v1;
@@ -163,7 +163,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
 
   // All except Vector3f
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  explicit TC_FORCE_INLINE VectorND(T v0, T v1, T v2) {
+  explicit TI_FORCE_INLINE VectorND(T v0, T v1, T v2) {
     static_assert(dim == 3, "Vector dim must be 3");
     this->d[0] = v0;
     this->d[1] = v1;
@@ -172,7 +172,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
 
   // All except Vector3f, Vector4f
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  explicit TC_FORCE_INLINE VectorND(T v0, T v1, T v2, T v3) {
+  explicit TI_FORCE_INLINE VectorND(T v0, T v1, T v2, T v3) {
     static_assert(dim == 4, "Vector dim must be 4");
     this->d[0] = v0;
     this->d[1] = v1;
@@ -182,7 +182,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
 
   // Vector extension
   template <int dim_ = dim, std::enable_if_t<(dim_ > 1), int> = 0>
-  explicit TC_FORCE_INLINE VectorND(const VectorND<dim - 1, T, ISE> &o,
+  explicit TI_FORCE_INLINE VectorND(const VectorND<dim - 1, T, ISE> &o,
                                     T extra) {
     for (int i = 0; i < dim_ - 1; i++) {
       this->d[i] = o[i];
@@ -191,32 +191,32 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
   }
 
   template <typename T_>
-  explicit TC_FORCE_INLINE VectorND(const std::vector<T_> &o) {
+  explicit TI_FORCE_INLINE VectorND(const std::vector<T_> &o) {
     if (o.size() != dim) {
-      TC_ERROR("Dimension mismatch: " + std::to_string(dim) + " v.s. " +
+      TI_ERROR("Dimension mismatch: " + std::to_string(dim) + " v.s. " +
                std::to_string((int)o.size()));
     }
     for (int i = 0; i < dim; i++)
       this->d[i] = T(o[i]);
   }
 
-  TC_FORCE_INLINE T &operator[](int i) {
+  TI_FORCE_INLINE T &operator[](int i) {
     return this->d[i];
   }
 
-  TC_FORCE_INLINE const T &operator[](int i) const {
+  TI_FORCE_INLINE const T &operator[](int i) const {
     return this->d[i];
   }
 
-  TC_FORCE_INLINE T &operator()(int i) {
+  TI_FORCE_INLINE T &operator()(int i) {
     return d[i];
   }
 
-  TC_FORCE_INLINE const T &operator()(int i) const {
+  TI_FORCE_INLINE const T &operator()(int i) const {
     return d[i];
   }
 
-  TC_FORCE_INLINE T dot(VectorND<dim, T, ISE> o) const {
+  TI_FORCE_INLINE T dot(VectorND<dim, T, ISE> o) const {
     T ret = T(0);
     for (int i = 0; i < dim; i++)
       ret += this->d[i] * o[i];
@@ -227,13 +227,13 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
       typename F,
       std::enable_if_t<std::is_convertible<F, std::function<T(int)>>::value,
                        int> = 0>
-  TC_FORCE_INLINE VectorND &set(const F &f) {
+  TI_FORCE_INLINE VectorND &set(const F &f) {
     for (int i = 0; i < dim; i++)
       this->d[i] = f(i);
     return *this;
   }
 
-  TC_FORCE_INLINE auto map(T(f)(T)) const
+  TI_FORCE_INLINE auto map(T(f)(T)) const
       -> VectorND<dim, decltype(f(T(0))), ISE> {
     VectorND<dim, decltype(f(T(0))), ISE> ret;
     for (int i = 0; i < dim; i++)
@@ -241,108 +241,108 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE VectorND &operator=(const VectorND &o) {
+  TI_FORCE_INLINE VectorND &operator=(const VectorND &o) {
     memcpy(this, &o, sizeof(*this));
     return *this;
   }
 
   // Non-SIMD cases
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND operator+(const VectorND &o) const {
+  TI_FORCE_INLINE VectorND operator+(const VectorND &o) const {
     return VectorND([=](int i) { return this->d[i] + o[i]; });
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND operator-(const VectorND &o) const {
+  TI_FORCE_INLINE VectorND operator-(const VectorND &o) const {
     return VectorND([=](int i) { return this->d[i] - o[i]; });
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND operator*(const VectorND &o) const {
+  TI_FORCE_INLINE VectorND operator*(const VectorND &o) const {
     return VectorND([=](int i) { return this->d[i] * o[i]; });
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND operator/(const VectorND &o) const {
+  TI_FORCE_INLINE VectorND operator/(const VectorND &o) const {
     return VectorND([=](int i) { return this->d[i] / o[i]; });
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND operator%(const VectorND &o) const {
+  TI_FORCE_INLINE VectorND operator%(const VectorND &o) const {
     return VectorND([=](int i) { return this->d[i] % o[i]; });
   }
 
   // Inplace operations
-  TC_FORCE_INLINE VectorND &operator+=(const VectorND &o) {
+  TI_FORCE_INLINE VectorND &operator+=(const VectorND &o) {
     (*this) = (*this) + o;
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND &operator-=(const VectorND &o) {
+  TI_FORCE_INLINE VectorND &operator-=(const VectorND &o) {
     (*this) = (*this) - o;
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND &operator*=(const VectorND &o) {
+  TI_FORCE_INLINE VectorND &operator*=(const VectorND &o) {
     (*this) = (*this) * o;
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND &operator*=(const T &o) {
+  TI_FORCE_INLINE VectorND &operator*=(const T &o) {
     (*this) = (*this) * o;
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND &operator/=(const VectorND &o) {
+  TI_FORCE_INLINE VectorND &operator/=(const VectorND &o) {
     (*this) = (*this) / o;
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND &operator/=(const T &o) {
+  TI_FORCE_INLINE VectorND &operator/=(const T &o) {
     (*this) = (*this) / o;
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND operator-() const {
+  TI_FORCE_INLINE VectorND operator-() const {
     return VectorND([=](int i) { return -this->d[i]; });
   }
 
-  TC_FORCE_INLINE bool operator==(const VectorND &o) const {
+  TI_FORCE_INLINE bool operator==(const VectorND &o) const {
     for (int i = 0; i < dim; i++)
       if (this->d[i] != o[i])
         return false;
     return true;
   }
 
-  TC_FORCE_INLINE bool operator<(const VectorND &o) const {
+  TI_FORCE_INLINE bool operator<(const VectorND &o) const {
     for (int i = 0; i < dim; i++)
       if (this->d[i] >= o[i])
         return false;
     return true;
   }
 
-  TC_FORCE_INLINE bool operator<=(const VectorND &o) const {
+  TI_FORCE_INLINE bool operator<=(const VectorND &o) const {
     for (int i = 0; i < dim; i++)
       if (this->d[i] > o[i])
         return false;
     return true;
   }
 
-  TC_FORCE_INLINE bool operator>(const VectorND &o) const {
+  TI_FORCE_INLINE bool operator>(const VectorND &o) const {
     for (int i = 0; i < dim; i++)
       if (this->d[i] <= o[i])
         return false;
     return true;
   }
 
-  TC_FORCE_INLINE bool operator>=(const VectorND &o) const {
+  TI_FORCE_INLINE bool operator>=(const VectorND &o) const {
     for (int i = 0; i < dim; i++)
       if (this->d[i] < o[i])
         return false;
     return true;
   }
 
-  TC_FORCE_INLINE bool operator==(const std::vector<T> &o) const {
+  TI_FORCE_INLINE bool operator==(const std::vector<T> &o) const {
     if (o.size() != dim)
       return false;
     for (int i = 0; i < dim; i++)
@@ -351,46 +351,46 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return true;
   }
 
-  TC_FORCE_INLINE bool operator!=(const VectorND &o) const {
+  TI_FORCE_INLINE bool operator!=(const VectorND &o) const {
     for (int i = 0; i < dim; i++)
       if (this->d[i] != o[i])
         return true;
     return false;
   }
 
-  TC_FORCE_INLINE VectorND abs() const {
+  TI_FORCE_INLINE VectorND abs() const {
     return VectorND([&](int i) { return std::abs(d[i]); });
   }
 
-  TC_FORCE_INLINE VectorND floor() const {
+  TI_FORCE_INLINE VectorND floor() const {
     return VectorND([&](int i) { return std::floor(d[i]); });
   }
 
-  TC_FORCE_INLINE VectorND sin() const {
+  TI_FORCE_INLINE VectorND sin() const {
     return VectorND([&](int i) { return std::sin(d[i]); });
   }
 
-  TC_FORCE_INLINE VectorND cos() const {
+  TI_FORCE_INLINE VectorND cos() const {
     return VectorND([&](int i) { return std::cos(d[i]); });
   }
 
-  TC_FORCE_INLINE VectorND fract() const {
+  TI_FORCE_INLINE VectorND fract() const {
     return VectorND([&](int i) { return taichi::fract(d[i]); });
   }
 
-  TC_FORCE_INLINE VectorND clamp() const {
+  TI_FORCE_INLINE VectorND clamp() const {
     return VectorND([&](int i) { return taichi::clamp(d[i]); });
   }
 
-  TC_FORCE_INLINE VectorND clamp(const T &a, T &b) const {
+  TI_FORCE_INLINE VectorND clamp(const T &a, T &b) const {
     return VectorND([&](int i) { return taichi::clamp(d[i], a, b); });
   }
 
-  TC_FORCE_INLINE VectorND clamp(const VectorND &a, const VectorND &b) const {
+  TI_FORCE_INLINE VectorND clamp(const VectorND &a, const VectorND &b) const {
     return VectorND([&](int i) { return taichi::clamp(d[i], a[i], b[i]); });
   }
 
-  TC_FORCE_INLINE T min() const {
+  TI_FORCE_INLINE T min() const {
     T ret = this->d[0];
     for (int i = 1; i < dim; i++) {
       ret = std::min(ret, this->d[i]);
@@ -398,7 +398,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE T max() const {
+  TI_FORCE_INLINE T max() const {
     T ret = this->d[0];
     for (int i = 1; i < dim; i++) {
       ret = std::max(ret, this->d[i]);
@@ -406,7 +406,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE T abs_max() const {
+  TI_FORCE_INLINE T abs_max() const {
     T ret = std::abs(this->d[0]);
     for (int i = 1; i < dim; i++) {
       ret = std::max(ret, std::abs(this->d[i]));
@@ -415,7 +415,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
   }
 
   template <typename G>
-  TC_FORCE_INLINE VectorND<dim, G, ISE> cast() const {
+  TI_FORCE_INLINE VectorND<dim, G, ISE> cast() const {
     return VectorND<dim, G, ISE>(
         [this](int i) { return static_cast<G>(this->d[i]); });
   }
@@ -434,17 +434,17 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
             int dim_ = dim,
             typename T_ = T,
             InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND permute() const {
+  TI_FORCE_INLINE VectorND permute() const {
     return VectorND(this->d[a], this->d[b], this->d[c], this->d[d]);
   }
 
   template <int a, int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND broadcast() const {
+  TI_FORCE_INLINE VectorND broadcast() const {
     return permute<a, a, a, a>();
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE T length2() const {
+  TI_FORCE_INLINE T length2() const {
     T ret = 0;
     for (int i = 0; i < dim; i++) {
       ret += this->d[i] * this->d[i];
@@ -452,7 +452,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE auto length() const {
+  TI_FORCE_INLINE auto length() const {
     return std::sqrt(length2());
   }
 
@@ -476,7 +476,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE T sum() const {
+  TI_FORCE_INLINE T sum() const {
     T ret = this->d[0];
     for (int i = 1; i < dim; i++) {
       ret += this->d[i];
@@ -484,11 +484,11 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE T average() const {
+  TI_FORCE_INLINE T average() const {
     return (T(1.0) / dim) * sum();
   }
 
-  TC_FORCE_INLINE T prod() const {
+  TI_FORCE_INLINE T prod() const {
     T ret = this->d[0];
     for (int i = 1; i < dim; i++) {
       ret *= this->d[i];
@@ -496,7 +496,7 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE VectorND pow(T index) const {
+  TI_FORCE_INLINE VectorND pow(T index) const {
     VectorND ret;
     for (int i = 0; i < dim; i++) {
       ret[i] = std::pow(this->d[i], index);
@@ -504,14 +504,14 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
     return ret;
   }
 
-  TC_FORCE_INLINE static VectorND axis(int i) {
+  TI_FORCE_INLINE static VectorND axis(int i) {
     VectorND ret(0);
     ret[i] = 1;
     return ret;
   }
 
-  TC_IO_DECL {
-    if (TC_SERIALIZER_IS(TextSerializer)) {
+  TI_IO_DECL {
+    if (TI_SERIALIZER_IS(TextSerializer)) {
       std::string ret = "(";
       for (int i = 0; i < dim - 1; i++) {
         ret += fmt::format("{}, ", d[i]);
@@ -520,11 +520,11 @@ struct VectorND : public VectorNDBase<dim__, T, ISE> {
       ret += ")";
       serializer("vec", ret);
     } else {
-      TC_IO(d);
+      TI_IO(d);
     }
   }
 
-  TC_FORCE_INLINE operator std::array<T, dim>() const {
+  TI_FORCE_INLINE operator std::array<T, dim>() const {
     std::array<T, dim> arr;
     for (int i = 0; i < dim; i++) {
       arr[i] = d[i];
@@ -537,47 +537,47 @@ template <typename T, int dim, InstSetExt ISE = default_instruction_set>
 using TVector = VectorND<dim, T, ISE>;
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> operator
+TI_FORCE_INLINE VectorND<dim, T, ISE> operator
     *(T a, const VectorND<dim, T, ISE> &v) {
   return VectorND<dim, T, ISE>(a) * v;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> operator*(const VectorND<dim, T, ISE> &v,
+TI_FORCE_INLINE VectorND<dim, T, ISE> operator*(const VectorND<dim, T, ISE> &v,
                                                 T a) {
   return a * v;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> operator/(
+TI_FORCE_INLINE VectorND<dim, T, ISE> operator/(
     T a,
     const VectorND<dim, T, ISE> &v) {
   return VectorND<dim, T, ISE>(a) / v;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> operator/(const VectorND<dim, T, ISE> &v,
+TI_FORCE_INLINE VectorND<dim, T, ISE> operator/(const VectorND<dim, T, ISE> &v,
                                                 T a) {
   return v / VectorND<dim, T, ISE>(a);
 }
 
 template <typename T>
-TC_FORCE_INLINE std::array<T, 1> to_std_array(const TVector<T, 1> &v) {
+TI_FORCE_INLINE std::array<T, 1> to_std_array(const TVector<T, 1> &v) {
   return std::array<T, 1>{v[0]};
 }
 
 template <typename T>
-TC_FORCE_INLINE std::array<T, 2> to_std_array(const TVector<T, 2> &v) {
+TI_FORCE_INLINE std::array<T, 2> to_std_array(const TVector<T, 2> &v) {
   return std::array<T, 2>{v[0], v[1]};
 }
 
 template <typename T>
-TC_FORCE_INLINE std::array<T, 3> to_std_array(const TVector<T, 3> &v) {
+TI_FORCE_INLINE std::array<T, 3> to_std_array(const TVector<T, 3> &v) {
   return std::array<T, 3>{v[0], v[1], v[2]};
 }
 
 template <typename T>
-TC_FORCE_INLINE std::array<T, 4> to_std_array(const TVector<T, 4> &v) {
+TI_FORCE_INLINE std::array<T, 4> to_std_array(const TVector<T, 4> &v) {
   return std::array<T, 4>{v[0], v[1], v[2], v[3]};
 }
 
@@ -602,7 +602,7 @@ using Vector3i = VectorND<3, int, default_instruction_set>;
 using Vector4i = VectorND<4, int, default_instruction_set>;
 
 template <typename T>
-TC_FORCE_INLINE T fused_mul_add(const T &a, const T &b, const T &c) {
+TI_FORCE_INLINE T fused_mul_add(const T &a, const T &b, const T &c) {
   return a * b + c;
 }
 
@@ -622,14 +622,14 @@ struct MatrixND {
   static constexpr InstSetExt ise = ISE;
   using type = T;
 
-  TC_FORCE_INLINE MatrixND() {
+  TI_FORCE_INLINE MatrixND() {
     for (int i = 0; i < dim; i++) {
       d[i] = VectorND<dim, T, ISE>();
     }
   }
 
   template <int dim_, typename T_, InstSetExt ISE_>
-  TC_FORCE_INLINE explicit MatrixND(const MatrixND<dim_, T_, ISE_> &o)
+  TI_FORCE_INLINE explicit MatrixND(const MatrixND<dim_, T_, ISE_> &o)
       : MatrixND() {
     for (int i = 0; i < std::min(dim_, dim__); i++) {
       for (int j = 0; j < std::min(dim_, dim__); j++) {
@@ -638,36 +638,36 @@ struct MatrixND {
     }
   }
 
-  TC_FORCE_INLINE MatrixND(T v) : MatrixND() {
+  TI_FORCE_INLINE MatrixND(T v) : MatrixND() {
     for (int i = 0; i < dim; i++) {
       d[i][i] = v;
     }
   }
 
-  TC_FORCE_INLINE MatrixND(const MatrixND &o) {
+  TI_FORCE_INLINE MatrixND(const MatrixND &o) {
     *this = o;
   }
 
   // Diag
-  TC_FORCE_INLINE explicit MatrixND(Vector v) : MatrixND() {
+  TI_FORCE_INLINE explicit MatrixND(Vector v) : MatrixND() {
     for (int i = 0; i < dim; i++)
       this->d[i][i] = v[i];
   }
 
-  TC_FORCE_INLINE explicit MatrixND(Vector v0, Vector v1) {
+  TI_FORCE_INLINE explicit MatrixND(Vector v0, Vector v1) {
     static_assert(dim == 2, "Matrix dim must be 2");
     this->d[0] = v0;
     this->d[1] = v1;
   }
 
-  TC_FORCE_INLINE explicit MatrixND(Vector v0, Vector v1, Vector v2) {
+  TI_FORCE_INLINE explicit MatrixND(Vector v0, Vector v1, Vector v2) {
     static_assert(dim == 3, "Matrix dim must be 3");
     this->d[0] = v0;
     this->d[1] = v1;
     this->d[2] = v2;
   }
 
-  TC_FORCE_INLINE explicit MatrixND(Vector v0,
+  TI_FORCE_INLINE explicit MatrixND(Vector v0,
                                     Vector v1,
                                     Vector v2,
                                     Vector v3) {
@@ -685,7 +685,7 @@ struct MatrixND {
                            F,
                            std::function<VectorND<dim__, T, ISE>(int)>>::value,
                        int> = 0>
-  TC_FORCE_INLINE explicit MatrixND(const F &f) {
+  TI_FORCE_INLINE explicit MatrixND(const F &f) {
     for (int i = 0; i < dim; i++)
       this->d[i] = f(i);
   }
@@ -696,37 +696,37 @@ struct MatrixND {
                            F,
                            std::function<VectorND<dim__, T, ISE>(int)>>::value,
                        int> = 0>
-  TC_FORCE_INLINE MatrixND &set(const F &f) {
+  TI_FORCE_INLINE MatrixND &set(const F &f) {
     for (int i = 0; i < dim; i++)
       this->d[i] = f(i);
     return *this;
   }
 
-  TC_FORCE_INLINE MatrixND &operator=(const MatrixND &o) {
+  TI_FORCE_INLINE MatrixND &operator=(const MatrixND &o) {
     for (int i = 0; i < dim; i++) {
       this->d[i] = o[i];
     }
     return *this;
   }
 
-  TC_FORCE_INLINE VectorND<dim, T, ISE> &operator[](int i) {
+  TI_FORCE_INLINE VectorND<dim, T, ISE> &operator[](int i) {
     return d[i];
   }
 
-  TC_FORCE_INLINE T &operator()(int i, int j) {
+  TI_FORCE_INLINE T &operator()(int i, int j) {
     return d[j][i];
   }
 
-  TC_FORCE_INLINE const T &operator()(int i, int j) const {
+  TI_FORCE_INLINE const T &operator()(int i, int j) const {
     return d[j][i];
   }
 
-  TC_FORCE_INLINE const VectorND<dim, T, ISE> &operator[](int i) const {
+  TI_FORCE_INLINE const VectorND<dim, T, ISE> &operator[](int i) const {
     return d[i];
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE VectorND<dim, T, ISE> operator*(
+  TI_FORCE_INLINE VectorND<dim, T, ISE> operator*(
       const VectorND<dim, T, ISE> &o) const {
     VectorND<dim, T, ISE> ret = d[0] * o[0];
     for (int i = 1; i < dim; i++)
@@ -735,7 +735,7 @@ struct MatrixND {
   }
 
   template <int dim_ = dim, typename T_ = T, InstSetExt ISE_ = ISE>
-  TC_FORCE_INLINE MatrixND operator*(const MatrixND &o) const {
+  TI_FORCE_INLINE MatrixND operator*(const MatrixND &o) const {
     MatrixND ret;
     for (int i = 0; i < dim; i++) {
       for (int j = 0; j < dim; j++) {
@@ -749,31 +749,31 @@ struct MatrixND {
     return ret;
   }
 
-  TC_FORCE_INLINE static MatrixND outer_product(Vector column, Vector row) {
+  TI_FORCE_INLINE static MatrixND outer_product(Vector column, Vector row) {
     return MatrixND([&](int i) { return column * row[i]; });
   }
 
-  TC_FORCE_INLINE MatrixND operator+(const MatrixND &o) const {
+  TI_FORCE_INLINE MatrixND operator+(const MatrixND &o) const {
     return MatrixND([=](int i) { return this->d[i] + o[i]; });
   }
 
-  TC_FORCE_INLINE MatrixND operator-(const MatrixND &o) const {
+  TI_FORCE_INLINE MatrixND operator-(const MatrixND &o) const {
     return MatrixND([=](int i) { return this->d[i] - o[i]; });
   }
 
-  TC_FORCE_INLINE MatrixND &operator+=(const MatrixND &o) {
+  TI_FORCE_INLINE MatrixND &operator+=(const MatrixND &o) {
     return this->set([&](int i) { return this->d[i] + o[i]; });
   }
 
-  TC_FORCE_INLINE MatrixND &operator-=(const MatrixND &o) {
+  TI_FORCE_INLINE MatrixND &operator-=(const MatrixND &o) {
     return this->set([&](int i) { return this->d[i] - o[i]; });
   }
 
-  TC_FORCE_INLINE MatrixND operator-() const {
+  TI_FORCE_INLINE MatrixND operator-() const {
     return MatrixND([=](int i) { return -this->d[i]; });
   }
 
-  TC_FORCE_INLINE bool operator==(const MatrixND &o) const {
+  TI_FORCE_INLINE bool operator==(const MatrixND &o) const {
     for (int i = 0; i < dim; i++)
       for (int j = 0; j < dim; j++)
         if (d[i][j] != o[i][j])
@@ -781,7 +781,7 @@ struct MatrixND {
     return true;
   }
 
-  TC_FORCE_INLINE bool operator!=(const MatrixND &o) const {
+  TI_FORCE_INLINE bool operator!=(const MatrixND &o) const {
     for (int i = 0; i < dim; i++)
       for (int j = 0; j < dim; j++)
         if (d[i][j] != o[i][j])
@@ -789,7 +789,7 @@ struct MatrixND {
     return false;
   }
 
-  TC_FORCE_INLINE T frobenius_norm2() const {
+  TI_FORCE_INLINE T frobenius_norm2() const {
     T sum = d[0].length2();
     for (int i = 1; i < dim; i++) {
       sum += d[i].length2();
@@ -797,11 +797,11 @@ struct MatrixND {
     return sum;
   }
 
-  TC_FORCE_INLINE auto frobenius_norm() const {
+  TI_FORCE_INLINE auto frobenius_norm() const {
     return std::sqrt(frobenius_norm2());
   }
 
-  TC_FORCE_INLINE MatrixND transposed() const {
+  TI_FORCE_INLINE MatrixND transposed() const {
     MatrixND ret;
     for (int i = 0; i < dim; i++) {
       for (int j = 0; j < dim; j++) {
@@ -812,7 +812,7 @@ struct MatrixND {
   }
 
   template <typename G>
-  TC_FORCE_INLINE MatrixND<dim, G, ISE> cast() const {
+  TI_FORCE_INLINE MatrixND<dim, G, ISE> cast() const {
     return MatrixND<dim, G, ISE>(
         [=](int i) { return d[i].template cast<G>(); });
   }
@@ -837,7 +837,7 @@ struct MatrixND {
     return ret;
   }
 
-  TC_FORCE_INLINE Vector diag() const {
+  TI_FORCE_INLINE Vector diag() const {
     Vector ret;
     for (int i = 0; i < dim; i++) {
       ret[i] = this->d[i][i];
@@ -845,7 +845,7 @@ struct MatrixND {
     return ret;
   }
 
-  TC_FORCE_INLINE T sum() const {
+  TI_FORCE_INLINE T sum() const {
     T ret(0);
     for (int i = 0; i < dim; i++) {
       ret += this->d[i].sum();
@@ -853,15 +853,15 @@ struct MatrixND {
     return ret;
   }
 
-  TC_FORCE_INLINE T trace() const {
+  TI_FORCE_INLINE T trace() const {
     return this->diag().sum();
   }
 
-  TC_FORCE_INLINE T tr() const {
+  TI_FORCE_INLINE T tr() const {
     return this->trace();
   }
 
-  TC_FORCE_INLINE MatrixND
+  TI_FORCE_INLINE MatrixND
   elementwise_product(const MatrixND<dim, T> &o) const {
     MatrixND ret;
     for (int i = 0; i < dim; i++) {
@@ -870,12 +870,12 @@ struct MatrixND {
     return ret;
   }
 
-  TC_FORCE_INLINE static MatrixND identidy() {
+  TI_FORCE_INLINE static MatrixND identidy() {
     return MatrixND(1.0_f);
   }
 
-  TC_IO_DECL {
-    TC_STATIC_IF(TC_SERIALIZER_IS(TextSerializer)) {
+  TI_IO_DECL {
+    TI_STATIC_IF(TI_SERIALIZER_IS(TextSerializer)) {
       for (int i = 0; i < dim; i++) {
         std::string line = "[";
         for (int j = 0; j < dim; j++) {
@@ -885,15 +885,15 @@ struct MatrixND {
         serializer.add_line(line);
       }
     }
-    TC_STATIC_ELSE {
-      TC_IO(d);
+    TI_STATIC_ELSE {
+      TI_IO(d);
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
   }
 };
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE MatrixND<dim, T, ISE> operator
+TI_FORCE_INLINE MatrixND<dim, T, ISE> operator
     *(const T a, const MatrixND<dim, T, ISE> &M) {
   MatrixND<dim, T, ISE> ret;
   for (int i = 0; i < dim; i++) {
@@ -903,19 +903,19 @@ TC_FORCE_INLINE MatrixND<dim, T, ISE> operator
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE MatrixND<dim, T, ISE> operator*(const MatrixND<dim, T, ISE> &M,
+TI_FORCE_INLINE MatrixND<dim, T, ISE> operator*(const MatrixND<dim, T, ISE> &M,
                                                 const T a) {
   return a * M;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE MatrixND<dim, T, ISE> transpose(
+TI_FORCE_INLINE MatrixND<dim, T, ISE> transpose(
     const MatrixND<dim, T, ISE> &mat) {
   return mat.transposed();
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE MatrixND<dim, T, ISE> transposed(
+TI_FORCE_INLINE MatrixND<dim, T, ISE> transposed(
     const MatrixND<dim, T, ISE> &mat) {
   return transpose(mat);
 }
@@ -936,89 +936,89 @@ using Matrix3d = MatrixND<3, float64, default_instruction_set>;
 using Matrix4d = MatrixND<4, float64, default_instruction_set>;
 
 template <typename T, InstSetExt ISE>
-TC_FORCE_INLINE real determinant(const MatrixND<2, T, ISE> &mat) {
+TI_FORCE_INLINE real determinant(const MatrixND<2, T, ISE> &mat) {
   return mat[0][0] * mat[1][1] - mat[0][1] * mat[1][0];
 }
 
 template <typename T, InstSetExt ISE>
-TC_FORCE_INLINE T determinant(const MatrixND<3, T, ISE> &mat) {
+TI_FORCE_INLINE T determinant(const MatrixND<3, T, ISE> &mat) {
   return mat[0][0] * (mat[1][1] * mat[2][2] - mat[2][1] * mat[1][2]) -
          mat[1][0] * (mat[0][1] * mat[2][2] - mat[2][1] * mat[0][2]) +
          mat[2][0] * (mat[0][1] * mat[1][2] - mat[1][1] * mat[0][2]);
 }
 
 template <typename T, InstSetExt ISE>
-TC_FORCE_INLINE T cross(const VectorND<2, T, ISE> &a,
+TI_FORCE_INLINE T cross(const VectorND<2, T, ISE> &a,
                         const VectorND<2, T, ISE> &b) {
   return a.x * b.y - a.y * b.x;
 }
 
 template <typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<3, T, ISE> cross(const VectorND<3, T, ISE> &a,
+TI_FORCE_INLINE VectorND<3, T, ISE> cross(const VectorND<3, T, ISE> &a,
                                           const VectorND<3, T, ISE> &b) {
   return VectorND<3, T, ISE>(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z,
                              a.x * b.y - a.y * b.x);
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE T dot(const VectorND<dim, T, ISE> &a,
+TI_FORCE_INLINE T dot(const VectorND<dim, T, ISE> &a,
                       const VectorND<dim, T, ISE> &b) {
   return a.dot(b);
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> normalize(
+TI_FORCE_INLINE VectorND<dim, T, ISE> normalize(
     const VectorND<dim, T, ISE> &a) {
   return (T(1) / a.length()) * a;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> normalized(
+TI_FORCE_INLINE VectorND<dim, T, ISE> normalized(
     const VectorND<dim, T, ISE> &a) {
   return normalize(a);
 }
 
-TC_FORCE_INLINE float32 length(const float32 &a) {
+TI_FORCE_INLINE float32 length(const float32 &a) {
   return a;
 }
 
-TC_FORCE_INLINE float64 length(const float64 &a) {
+TI_FORCE_INLINE float64 length(const float64 &a) {
   return a;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE T length(const VectorND<dim, T, ISE> &a) {
+TI_FORCE_INLINE T length(const VectorND<dim, T, ISE> &a) {
   return a.length();
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE T length2(const VectorND<dim, T, ISE> &a) {
+TI_FORCE_INLINE T length2(const VectorND<dim, T, ISE> &a) {
   return dot(a, a);
 }
 
-TC_FORCE_INLINE float32 length2(const float32 &a) {
+TI_FORCE_INLINE float32 length2(const float32 &a) {
   return a * a;
 }
 
-TC_FORCE_INLINE float64 length2(const float64 &a) {
+TI_FORCE_INLINE float64 length2(const float64 &a) {
   return a * a;
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE VectorND<dim, T, ISE> fract(const VectorND<dim, T, ISE> &a) {
+TI_FORCE_INLINE VectorND<dim, T, ISE> fract(const VectorND<dim, T, ISE> &a) {
   return a.fract();
 }
 
-TC_FORCE_INLINE float32 inversed(const float32 &a) {
+TI_FORCE_INLINE float32 inversed(const float32 &a) {
   return 1.0_f32 / a;
 }
 
-TC_FORCE_INLINE float64 inversed(const float64 &a) {
+TI_FORCE_INLINE float64 inversed(const float64 &a) {
   return 1.0_f64 / a;
 }
 
 template <InstSetExt ISE, typename T>
-TC_FORCE_INLINE MatrixND<2, T, ISE> inversed(const MatrixND<2, T, ISE> &mat) {
+TI_FORCE_INLINE MatrixND<2, T, ISE> inversed(const MatrixND<2, T, ISE> &mat) {
   T det = determinant(mat);
   return static_cast<T>(1) / det *
          MatrixND<2, T, ISE>(VectorND<2, T, ISE>(mat[1][1], -mat[0][1]),
@@ -1277,18 +1277,18 @@ MatrixND<4, T, ISE> inversed(const MatrixND<4, T, ISE> &m) {
 }
 
 template <int dim, typename T, InstSetExt ISE>
-TC_FORCE_INLINE MatrixND<dim, T, ISE> inverse(const MatrixND<dim, T, ISE> &m) {
+TI_FORCE_INLINE MatrixND<dim, T, ISE> inverse(const MatrixND<dim, T, ISE> &m) {
   return inversed(m);
 }
 
-TC_FORCE_INLINE Vector3 multiply_matrix4(const Matrix4 &m,
+TI_FORCE_INLINE Vector3 multiply_matrix4(const Matrix4 &m,
                                          const Vector3 &v,
                                          real w) {
   return Vector3(m * Vector4(v, w));
 }
 
 template <int dim>
-TC_FORCE_INLINE VectorND<dim, real> transform(const MatrixND<dim + 1, real> &m,
+TI_FORCE_INLINE VectorND<dim, real> transform(const MatrixND<dim + 1, real> &m,
                                               const VectorND<dim, real> &v,
                                               real w = 1.0_f) {
   return VectorND<dim, real>(m * VectorND<dim + 1, real>(v, w));
@@ -1317,7 +1317,7 @@ struct is_matrix<MatrixND<dim, T, ISE>> {
 };
 
 template <int dim, typename T>
-TC_FORCE_INLINE VectorND<dim, T> min(const VectorND<dim, T> &a,
+TI_FORCE_INLINE VectorND<dim, T> min(const VectorND<dim, T> &a,
                                      const VectorND<dim, T> &b) {
   VectorND<dim, T> ret;
   for (int i = 0; i < dim; i++) {
@@ -1327,7 +1327,7 @@ TC_FORCE_INLINE VectorND<dim, T> min(const VectorND<dim, T> &a,
 }
 
 template <int dim, typename T>
-TC_FORCE_INLINE VectorND<dim, T> max(const VectorND<dim, T> &a,
+TI_FORCE_INLINE VectorND<dim, T> max(const VectorND<dim, T> &a,
                                      const VectorND<dim, T> &b) {
   VectorND<dim, T> ret;
   for (int i = 0; i < dim; i++) {
@@ -1448,8 +1448,8 @@ template <int N, typename T, InstSetExt ISE>
 struct is_MatrixND<MatrixND<N, T, ISE>> : public std::true_type {};
 }  // namespace type
 
-#if defined(TC_AMALGAMATED)
-TC_FORCE_INLINE void polar_decomp(Matrix2 m, Matrix2 &R, Matrix2 &S) {
+#if defined(TI_AMALGAMATED)
+TI_FORCE_INLINE void polar_decomp(Matrix2 m, Matrix2 &R, Matrix2 &S) {
   auto x = m(0, 0) + m(1, 1);
   auto y = m(1, 0) - m(0, 1);
   auto scale = 1.0_f / std::sqrt(x * x + y * y);
@@ -1505,29 +1505,29 @@ inline void test_simple_decompositions() {
     Matrix U, sig, V, Q, R, S;
 
     polar_decomp(m, R, S);
-    TC_CHECK_EQUAL(m, R * S, tolerance);
-    TC_CHECK_EQUAL(Matrix(1), R * transposed(R), tolerance);
-    TC_CHECK_EQUAL(1.0_f, determinant(R), tolerance);
-    TC_CHECK_EQUAL(S, transposed(S), tolerance);
+    TI_CHECK_EQUAL(m, R * S, tolerance);
+    TI_CHECK_EQUAL(Matrix(1), R * transposed(R), tolerance);
+    TI_CHECK_EQUAL(1.0_f, determinant(R), tolerance);
+    TI_CHECK_EQUAL(S, transposed(S), tolerance);
 
     svd(m, U, sig, V);
     if (dim == 2) {
       CHECK(tolerance + sig(0, 0) > std::abs(sig(1, 1)));
     }
-    TC_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
-    TC_CHECK_EQUAL(Matrix(1), U * transposed(U), tolerance);
-    TC_CHECK_EQUAL(Matrix(1), V * transposed(V), tolerance);
-    TC_CHECK_EQUAL(1.0_f, determinant(U), tolerance);
-    TC_CHECK_EQUAL(1.0_f, determinant(V), tolerance);
-    TC_CHECK_EQUAL(sig, Matrix(sig.diag()), tolerance);
+    TI_CHECK_EQUAL(m, U * sig * transposed(V), tolerance);
+    TI_CHECK_EQUAL(Matrix(1), U * transposed(U), tolerance);
+    TI_CHECK_EQUAL(Matrix(1), V * transposed(V), tolerance);
+    TI_CHECK_EQUAL(1.0_f, determinant(U), tolerance);
+    TI_CHECK_EQUAL(1.0_f, determinant(V), tolerance);
+    TI_CHECK_EQUAL(sig, Matrix(sig.diag()), tolerance);
   }
 };
 
 /*
-TC_TEST("SVD") {
+TI_TEST("SVD") {
   test_simple_decompositions<2, float32>();
 }
 */
 #endif
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/math.h b/taichi/math/math.h
index d9b4bf0bffc4c..9cf0eca11581b 100644
--- a/taichi/math/math.h
+++ b/taichi/math/math.h
@@ -10,16 +10,16 @@
 #include "array.h"
 #include "linalg.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 namespace math {
 template <typename T>
-TC_FORCE_INLINE T degrees(T rad) {
+TI_FORCE_INLINE T degrees(T rad) {
   return rad * (type::element<T>(180) / pi);
 }
 
 template <typename T>
-TC_FORCE_INLINE T radians(T deg) {
+TI_FORCE_INLINE T radians(T deg) {
   return deg * (pi / type::element<T>(180));
 }
 
@@ -27,25 +27,25 @@ TC_FORCE_INLINE T radians(T deg) {
 template <typename F, typename T>
 inline T map(const T &t, const F &f) {
   T ret;
-  TC_STATIC_IF(type::is_VectorND<T>()) {
+  TI_STATIC_IF(type::is_VectorND<T>()) {
     for (int i = 0; i < std::decay_t<decltype(t)>::dim; i++) {
       ret[i] = f(t[i]);
     }
   }
-  TC_STATIC_ELSE{
-    TC_STATIC_IF(type::is_MatrixND<T>()){
+  TI_STATIC_ELSE{
+    TI_STATIC_IF(type::is_MatrixND<T>()){
       for (int i = 0; i < std::decay_t<decltype(t)>::dim; i++){
         for (int j = 0; j < std::decay_t<decltype(t)>::dim; j++){
           ret[i][j] = f(t(i, j));
         }
       }
     }
-    TC_STATIC_ELSE {
+    TI_STATIC_ELSE {
       ret = f(t);
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
   }
-  TC_STATIC_END_IF
+  TI_STATIC_END_IF
   return ret;
 }
 // clang-format on
@@ -54,14 +54,14 @@ inline T map(const T &t, const F &f) {
 template <typename T>
 inline type::element<T> maximum(const T &t) {
   typename type::element<T> ret;
-  TC_STATIC_IF(type::is_VectorND<T>()) {
+  TI_STATIC_IF(type::is_VectorND<T>()) {
     ret = t(0);
     for (int i = 1; i < T::dim; i++) {
       ret = std::max(ret, t(i));
     }
   }
-  TC_STATIC_ELSE {
-    TC_STATIC_IF(type::is_MatrixND<T>()) {
+  TI_STATIC_ELSE {
+    TI_STATIC_IF(type::is_MatrixND<T>()) {
       ret = t(0, 0);
       for (int i = 0; i < T::dim; i++){
         for (int j = 0; j < T::dim; j++){
@@ -69,12 +69,12 @@ inline type::element<T> maximum(const T &t) {
         }
       }
     }
-    TC_STATIC_ELSE {
+    TI_STATIC_ELSE {
       ret = t;
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
   }
-  TC_STATIC_END_IF
+  TI_STATIC_END_IF
   return ret;
 }
 // clang-format on
@@ -83,14 +83,14 @@ inline type::element<T> maximum(const T &t) {
 template <typename T>
 inline type::element<T> minimum(const T &t) {
   typename type::element<T> ret;
-  TC_STATIC_IF(type::is_VectorND<T>()) {
+  TI_STATIC_IF(type::is_VectorND<T>()) {
     ret = t(0);
     for (int i = 1; i < T::dim; i++) {
       ret = std::min(ret, t(i));
     }
   }
-  TC_STATIC_ELSE {
-    TC_STATIC_IF(type::is_MatrixND<T>()) {
+  TI_STATIC_ELSE {
+    TI_STATIC_IF(type::is_MatrixND<T>()) {
       ret = t(0, 0);
       for (int i = 0; i < T::dim; i++){
         for (int j = 0; j < T::dim; j++){
@@ -98,12 +98,12 @@ inline type::element<T> minimum(const T &t) {
         }
       }
     }
-    TC_STATIC_ELSE {
+    TI_STATIC_ELSE {
       ret = t;
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
   }
-  TC_STATIC_END_IF
+  TI_STATIC_END_IF
   return ret;
 }
 // clang-format on
@@ -112,80 +112,80 @@ inline type::element<T> minimum(const T &t) {
 template <typename T>
 inline type::element<T> sum(const T &t) {
   typename type::element<T> ret = 0;
-  TC_STATIC_IF(type::is_VectorND<T>()) {
+  TI_STATIC_IF(type::is_VectorND<T>()) {
     for (int i = 0; i < std::decay_t<decltype(t)>::dim; i++) {
       ret += t(i);
     }
   }
-  TC_STATIC_ELSE {
-    TC_STATIC_IF(type::is_MatrixND<T>()) {
+  TI_STATIC_ELSE {
+    TI_STATIC_IF(type::is_MatrixND<T>()) {
       for (int i = 0; i < std::decay_t<decltype(t)>::dim; i++){
         for (int j = 0; j < std::decay_t<decltype(t)>::dim; j++){
           ret += t(i, j);
         }
       }
     }
-    TC_STATIC_ELSE {
+    TI_STATIC_ELSE {
       ret = t;
     }
-    TC_STATIC_END_IF
+    TI_STATIC_END_IF
   }
-  TC_STATIC_END_IF
+  TI_STATIC_END_IF
   return ret;
 }
 
 template <typename T>
 inline type::element<T> prod(const T &t) {
   typename type::element<T> ret = 1;
-  TC_STATIC_IF(type::is_VectorND<T>()) {
+  TI_STATIC_IF(type::is_VectorND<T>()) {
     for (int i = 0; i < T::dim; i++) {
       ret *= t(i);
     }
-  } TC_STATIC_ELSE {
-    TC_STATIC_IF(type::is_MatrixND<T>()) {
+  } TI_STATIC_ELSE {
+    TI_STATIC_IF(type::is_MatrixND<T>()) {
       for (int i = 0; i < T::dim; i++) {
         for (int j = 0; j < T::dim; j++) {
           ret *= t(i, j);
         }
       }
-    } TC_STATIC_ELSE {
+    } TI_STATIC_ELSE {
       ret = t;
-    } TC_STATIC_END_IF
-  } TC_STATIC_END_IF
+    } TI_STATIC_END_IF
+  } TI_STATIC_END_IF
   return ret;
 }
 // clang-format on
 
-#define TC_MAKE_VECTORIZED_FROM_STD(op)                  \
+#define TI_MAKE_VECTORIZED_FROM_STD(op)                  \
   template <typename T>                                  \
   inline T op(const T &t) {                              \
     using Elem = typename type::element<decltype(t)>;    \
     return map(t, static_cast<Elem (*)(Elem)>(std::op)); \
   }
 
-TC_MAKE_VECTORIZED_FROM_STD(abs);
-TC_MAKE_VECTORIZED_FROM_STD(log);
-TC_MAKE_VECTORIZED_FROM_STD(exp);
-TC_MAKE_VECTORIZED_FROM_STD(sin);
-TC_MAKE_VECTORIZED_FROM_STD(cos);
-TC_MAKE_VECTORIZED_FROM_STD(tan);
-TC_MAKE_VECTORIZED_FROM_STD(asin);
-TC_MAKE_VECTORIZED_FROM_STD(acos);
-TC_MAKE_VECTORIZED_FROM_STD(atan);
-TC_MAKE_VECTORIZED_FROM_STD(tanh);
-TC_MAKE_VECTORIZED_FROM_STD(ceil);
-TC_MAKE_VECTORIZED_FROM_STD(floor);
-TC_MAKE_VECTORIZED_FROM_STD(sqrt);
+TI_MAKE_VECTORIZED_FROM_STD(abs);
+TI_MAKE_VECTORIZED_FROM_STD(log);
+TI_MAKE_VECTORIZED_FROM_STD(exp);
+TI_MAKE_VECTORIZED_FROM_STD(sin);
+TI_MAKE_VECTORIZED_FROM_STD(cos);
+TI_MAKE_VECTORIZED_FROM_STD(tan);
+TI_MAKE_VECTORIZED_FROM_STD(asin);
+TI_MAKE_VECTORIZED_FROM_STD(acos);
+TI_MAKE_VECTORIZED_FROM_STD(atan);
+TI_MAKE_VECTORIZED_FROM_STD(tanh);
+TI_MAKE_VECTORIZED_FROM_STD(ceil);
+TI_MAKE_VECTORIZED_FROM_STD(floor);
+TI_MAKE_VECTORIZED_FROM_STD(sqrt);
 
 template <typename T>
-TC_FORCE_INLINE
+TI_FORCE_INLINE
     typename std::enable_if_t<!std::is_floating_point<T>::value, bool>
     equal(const T &A, const T &B, float64 tolerance) {
   return maximum(abs(A - B)) <= tolerance;
 }
 
 template <typename T>
-TC_FORCE_INLINE
+TI_FORCE_INLINE
     typename std::enable_if_t<std::is_floating_point<T>::value, bool>
     equal(const T &A, const T &B, float64 tolerance) {
   return std::abs(A - B) <= tolerance;
@@ -197,13 +197,13 @@ template <int dim, typename T, InstSetExt ISE>
 template <typename T_,
           typename std::enable_if_t<std::is_same<T_, int>::value, int>>
 VectorND<dim, T, ISE>::VectorND(const TIndex<dim> &ind) {
-  TC_STATIC_ASSERT(2 <= dim && dim <= 3);
+  TI_STATIC_ASSERT(2 <= dim && dim <= 3);
   d[0] = ind.i;
   d[1] = ind.j;
-  TC_STATIC_IF(dim == 3) {
+  TI_STATIC_IF(dim == 3) {
     this->d[2] = ind.k;
   }
-  TC_STATIC_END_IF
+  TI_STATIC_END_IF
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/scalar.h b/taichi/math/scalar.h
index d93606686e3b9..9d913f4c85dd9 100644
--- a/taichi/math/scalar.h
+++ b/taichi/math/scalar.h
@@ -8,7 +8,7 @@
 #include <cmath>
 #include <taichi/common/util.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 #undef max
 #undef min
@@ -28,7 +28,7 @@ const real pi{acosf(-1.0_f)};
 const real eps = 1e-6_f;
 
 template <int I, typename T>
-constexpr TC_FORCE_INLINE T pow(T a) noexcept {
+constexpr TI_FORCE_INLINE T pow(T a) noexcept {
   T ret(1);
   for (int i = 0; i < I; i++) {
     ret *= a;
@@ -36,16 +36,16 @@ constexpr TC_FORCE_INLINE T pow(T a) noexcept {
   return ret;
 };
 
-TC_FORCE_INLINE float32 fract(float32 a) noexcept {
+TI_FORCE_INLINE float32 fract(float32 a) noexcept {
   return a - (int)floor(a);
 }
 
-TC_FORCE_INLINE float64 fract(float64 a) noexcept {
+TI_FORCE_INLINE float64 fract(float64 a) noexcept {
   return a - (int)floor(a);
 }
 
 template <typename T>
-TC_FORCE_INLINE T clamp(const T &a, const T &min, const T &max) noexcept {
+TI_FORCE_INLINE T clamp(const T &a, const T &min, const T &max) noexcept {
   if (a < min)
     return min;
   if (a > max)
@@ -54,7 +54,7 @@ TC_FORCE_INLINE T clamp(const T &a, const T &min, const T &max) noexcept {
 }
 
 template <typename T>
-TC_FORCE_INLINE T clamp01(const T &a) noexcept {
+TI_FORCE_INLINE T clamp01(const T &a) noexcept {
   if (a < T(0))
     return T(0);
   if (a > T(1))
@@ -63,21 +63,21 @@ TC_FORCE_INLINE T clamp01(const T &a) noexcept {
 }
 
 template <typename T>
-TC_FORCE_INLINE T clamp(const T &a) noexcept {
+TI_FORCE_INLINE T clamp(const T &a) noexcept {
   return clamp01(a);
 }
 
 template <typename T, typename V>
-TC_FORCE_INLINE V lerp(T a, V x_0, V x_1) noexcept {
+TI_FORCE_INLINE V lerp(T a, V x_0, V x_1) noexcept {
   return V((T(1) - a) * x_0 + a * x_1);
 }
 
 template <typename T>
-TC_FORCE_INLINE T sqr(const T &a) noexcept {
+TI_FORCE_INLINE T sqr(const T &a) noexcept {
   return pow<2>(a);
 }
 
-TC_FORCE_INLINE int sgn(float a) noexcept {
+TI_FORCE_INLINE int sgn(float a) noexcept {
   if (a < -eps)
     return -1;
   else if (a > eps)
@@ -85,7 +85,7 @@ TC_FORCE_INLINE int sgn(float a) noexcept {
   return 0;
 }
 
-TC_FORCE_INLINE int sgn(double a) noexcept {
+TI_FORCE_INLINE int sgn(double a) noexcept {
   if (a < -eps)
     return -1;
   else if (a > eps)
@@ -93,7 +93,7 @@ TC_FORCE_INLINE int sgn(double a) noexcept {
   return 0;
 }
 
-TC_FORCE_INLINE uint32 rand_int() noexcept {
+TI_FORCE_INLINE uint32 rand_int() noexcept {
   static unsigned int x = 123456789, y = 362436069, z = 521288629, w = 88675123;
   unsigned int t = x ^ (x << 11);
   x = y;
@@ -102,30 +102,30 @@ TC_FORCE_INLINE uint32 rand_int() noexcept {
   return (w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)));
 }
 
-TC_FORCE_INLINE uint64 rand_int64() noexcept {
+TI_FORCE_INLINE uint64 rand_int64() noexcept {
   return ((uint64)rand_int() << 32) + rand_int();
 }
 
 // inline float frand() { return (float)rand() / (RAND_MAX + 1); }
-TC_FORCE_INLINE float32 rand() noexcept {
+TI_FORCE_INLINE float32 rand() noexcept {
   return rand_int() * (1.0_f / 4294967296.0f);
 }
 
 template <typename T>
-TC_FORCE_INLINE T rand() noexcept;
+TI_FORCE_INLINE T rand() noexcept;
 
 template <>
-TC_FORCE_INLINE float rand<float>() noexcept {
+TI_FORCE_INLINE float rand<float>() noexcept {
   return rand_int() * (1.0_f / 4294967296.0f);
 }
 
 template <>
-TC_FORCE_INLINE double rand<double>() noexcept {
+TI_FORCE_INLINE double rand<double>() noexcept {
   return rand_int() * (1.0 / 4294967296.0);
 }
 
 template <>
-TC_FORCE_INLINE int rand<int>() noexcept {
+TI_FORCE_INLINE int rand<int>() noexcept {
   return rand_int();
 }
 
@@ -139,25 +139,25 @@ inline int is_prime(int a) noexcept {
 }
 
 template <typename T>
-TC_FORCE_INLINE T hypot2(const T &x, const T &y) noexcept {
+TI_FORCE_INLINE T hypot2(const T &x, const T &y) noexcept {
   return x * x + y * y;
 }
 
-TC_FORCE_INLINE float32 pow(const float32 &a, const float32 &b) noexcept {
+TI_FORCE_INLINE float32 pow(const float32 &a, const float32 &b) noexcept {
   return ::pow(a, b);
 }
 
-TC_FORCE_INLINE float64 pow(const float64 &a, const float64 &b) noexcept {
+TI_FORCE_INLINE float64 pow(const float64 &a, const float64 &b) noexcept {
   return ::pow(a, b);
 }
 
 template <typename T>
-TC_FORCE_INLINE bool is_normal(T m) noexcept {
+TI_FORCE_INLINE bool is_normal(T m) noexcept {
   return std::isfinite(m);
 }
 
 template <typename T>
-TC_FORCE_INLINE bool abnormal(T m) noexcept {
+TI_FORCE_INLINE bool abnormal(T m) noexcept {
   return !is_normal(m);
 }
 
@@ -171,4 +171,4 @@ inline int64 get_largest_pot(int64 a) noexcept {
   return i;
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/math/sifakis_svd.h b/taichi/math/sifakis_svd.h
index 31d1c7732b543..49fe88efe4919 100644
--- a/taichi/math/sifakis_svd.h
+++ b/taichi/math/sifakis_svd.h
@@ -38,7 +38,7 @@ A. McAdams, A. Selle, R. Tamstorf, J. Teran and E. Sifakis
 // POSSIBILITY OF SUCH DAMAGE.
 //#####################################################################
 
-TC_FORCE_INLINE float rsqrt(const float f) {
+TI_FORCE_INLINE float rsqrt(const float f) {
   return 1.0f / std::sqrt(f);
 }
 
@@ -49,7 +49,7 @@ constexpr float Cosine_Pi_Over_Eight =
     0.9238795325112867f;  //.5 * sqrt(2. + sqrt(2.));
 
 template <int sweeps = 4>
-TC_FORCE_INLINE void svd(const float a11,
+TI_FORCE_INLINE void svd(const float a11,
                          const float a12,
                          const float a13,
                          const float a21,
diff --git a/taichi/memory_pool.cpp b/taichi/memory_pool.cpp
index 45d781631935c..11476619985e6 100644
--- a/taichi/memory_pool.cpp
+++ b/taichi/memory_pool.cpp
@@ -11,7 +11,7 @@
 TLANG_NAMESPACE_BEGIN
 
 MemoryPool::MemoryPool(Program *prog) : prog(prog) {
-  TC_TRACE("Memory pool created. Default buffer size per allocator = {} MB",
+  TI_TRACE("Memory pool created. Default buffer size per allocator = {} MB",
           default_allocator_size / 1024 / 1024);
   terminating = false;
   killed = false;
@@ -46,7 +46,7 @@ void *MemoryPool::allocate(std::size_t size, std::size_t alignment) {
         std::make_unique<UnifiedAllocator>(new_buffer_size, prog->config.arch));
     ret = allocators.back()->allocate(size, alignment);
   }
-  TC_ASSERT(ret);
+  TI_ASSERT(ret);
   return ret;
 }
 
@@ -59,7 +59,7 @@ T MemoryPool::fetch(volatile void *ptr) {
                                       cudaMemcpyDeviceToHost, cuda_stream));
     check_cuda_error(cudaStreamSynchronize(cuda_stream));
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   } else {
     ret = *(T *)ptr;
@@ -75,7 +75,7 @@ void MemoryPool::push(volatile T *dest, const T &val) {
                                       cudaMemcpyHostToDevice, cuda_stream));
     check_cuda_error(cudaStreamSynchronize(cuda_stream));
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   } else {
     *(T *)dest = val;
@@ -100,16 +100,16 @@ void MemoryPool::daemon() {
     if (tail > processed_tail) {
       // allocate new buffer
       auto i = processed_tail;
-      TC_DEBUG("Processing memory alloc request {}", i);
+      TI_DEBUG("Processing memory alloc request {}", i);
       auto req = fetch<MemRequest>(&queue->requests[i]);
       if (req.size == 0 || req.alignment == 0) {
-        TC_DEBUG(" Incomplete memory alloc request {} fetched. Skipping", i);
+        TI_DEBUG(" Incomplete memory alloc request {} fetched. Skipping", i);
         continue;
       }
-      TC_DEBUG("  Allocating memory {} B (alignment {}B) ", req.size,
+      TI_DEBUG("  Allocating memory {} B (alignment {}B) ", req.size,
                req.alignment);
       auto ptr = allocate(req.size, req.alignment);
-      TC_DEBUG("  Allocated. Ptr = {:p}", ptr);
+      TI_DEBUG("  Allocated. Ptr = {:p}", ptr);
       push(&queue->requests[i].ptr, (uint8 *)ptr);
       processed_tail += 1;
     }
@@ -122,7 +122,7 @@ void MemoryPool::terminate() {
     terminating = true;
   }
   th->join();
-  TC_ASSERT(killed);
+  TI_ASSERT(killed);
 #ifdef TI_WITH_CUDA
   check_cuda_error(cudaStreamDestroy(cuda_stream));
 #endif
diff --git a/taichi/platform/mac/objc_api.cpp b/taichi/platform/mac/objc_api.cpp
index 3527fc818a9d7..0a0141ecd2513 100644
--- a/taichi/platform/mac/objc_api.cpp
+++ b/taichi/platform/mac/objc_api.cpp
@@ -1,14 +1,14 @@
 #include "objc_api.h"
 
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
 
 namespace taichi {
 namespace mac {
 
-nsobj_unique_ptr<TC_NSString> wrap_string_as_ns_string(const std::string &str) {
+nsobj_unique_ptr<TI_NSString> wrap_string_as_ns_string(const std::string &str) {
   constexpr int kNSUTF8StringEncoding = 4;
   id ns_string = clscall("NSString", "alloc");
-  auto *ptr = cast_call<TC_NSString *>(
+  auto *ptr = cast_call<TI_NSString *>(
       ns_string,
       "initWithBytesNoCopy:length:encoding:freeWhenDone:", str.data(),
       str.size(), kNSUTF8StringEncoding, false);
@@ -18,4 +18,4 @@ nsobj_unique_ptr<TC_NSString> wrap_string_as_ns_string(const std::string &str) {
 }  // namespace mac
 }  // namespace taichi
 
-#endif  // TC_PLATFORM_OSX
+#endif  // TI_PLATFORM_OSX
diff --git a/taichi/platform/mac/objc_api.h b/taichi/platform/mac/objc_api.h
index 61273ffcaba43..faaf6ee1e0f19 100644
--- a/taichi/platform/mac/objc_api.h
+++ b/taichi/platform/mac/objc_api.h
@@ -2,7 +2,7 @@
 
 #include <taichi/common/util.h>
 
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
 
 #include <objc/message.h>
 #include <objc/objc.h>
@@ -44,15 +44,15 @@ nsobj_unique_ptr<O> wrap_as_nsobj_unique_ptr(O *nsobj) {
   return nsobj_unique_ptr<O>(nsobj);
 }
 
-// Prepend "TC_" to native ObjC type names, otherwise clang-format thinks this
+// Prepend "TI_" to native ObjC type names, otherwise clang-format thinks this
 // is an ObjC file and is not happy formatting it.
-struct TC_NSString;
+struct TI_NSString;
 
 // |str| must exist during the entire lifetime of the returned object, as it
 // does not own the underlying memory. Think of it as std::string_view.
-nsobj_unique_ptr<TC_NSString> wrap_string_as_ns_string(const std::string &str);
+nsobj_unique_ptr<TI_NSString> wrap_string_as_ns_string(const std::string &str);
 
 }  // namespace mac
 }  // namespace taichi
 
-#endif  // TC_PLATFORM_OSX
+#endif  // TI_PLATFORM_OSX
diff --git a/taichi/platform/metal/helpers.metal.h b/taichi/platform/metal/helpers.metal.h
index 97c18f614c67c..51912e113697f 100644
--- a/taichi/platform/metal/helpers.metal.h
+++ b/taichi/platform/metal/helpers.metal.h
@@ -1,4 +1,4 @@
-#ifdef TC_INSIDE_METAL_CODEGEN
+#ifdef TI_INSIDE_METAL_CODEGEN
 
 #define METAL_BEGIN_HELPERS_DEF constexpr auto kMetalHelpersSourceCode =
 #define METAL_END_HELPERS_DEF ;
@@ -27,7 +27,7 @@ namespace metal {
 bool memory_order_relaxed = false;
 }  // namespace metal
 
-#endif  // TC_INSIDE_METAL_CODEGEN
+#endif  // TI_INSIDE_METAL_CODEGEN
 
 METAL_BEGIN_HELPERS_DEF
 STR(
diff --git a/taichi/platform/metal/metal_api.cpp b/taichi/platform/metal/metal_api.cpp
index 62ed09d70441f..170fb7d7cd3e8 100644
--- a/taichi/platform/metal/metal_api.cpp
+++ b/taichi/platform/metal/metal_api.cpp
@@ -4,7 +4,7 @@ TLANG_NAMESPACE_BEGIN
 
 namespace metal {
 
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
 
 extern "C" {
 id MTLCreateSystemDefaultDevice();
@@ -124,10 +124,10 @@ size_t get_max_total_threads_per_threadgroup(
   return (size_t)call(pipeline_state, "maxTotalThreadsPerThreadgroup");
 }
 
-#endif  // TC_PLATFORM_OSX
+#endif  // TI_PLATFORM_OSX
 
 bool is_metal_api_available() {
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
   // If the macOS is provided by a VM (e.g. Travis CI), it's possible that there
   // is no GPU device, so we still have to do a runtime check.
   auto device = mtl_create_system_default_device();
diff --git a/taichi/platform/metal/metal_api.h b/taichi/platform/metal/metal_api.h
index fe2dc6a03f0bb..56dd4b035d70c 100644
--- a/taichi/platform/metal/metal_api.h
+++ b/taichi/platform/metal/metal_api.h
@@ -25,7 +25,7 @@ struct MTLFunction;
 struct MTLComputePipelineState;
 struct MTLBuffer;
 
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
 
 using mac::nsobj_unique_ptr;
 
@@ -93,7 +93,7 @@ inline void *mtl_buffer_contents(MTLBuffer *buffer) {
 
 size_t get_max_total_threads_per_threadgroup(
     MTLComputePipelineState *pipeline_state);
-#endif  // TC_PLATFORM_OSX
+#endif  // TI_PLATFORM_OSX
 
 bool is_metal_api_available();
 
diff --git a/taichi/platform/metal/metal_data_types.cpp b/taichi/platform/metal/metal_data_types.cpp
index 4a7eff09a65f6..fac9839ab43e0 100644
--- a/taichi/platform/metal/metal_data_types.cpp
+++ b/taichi/platform/metal/metal_data_types.cpp
@@ -22,7 +22,7 @@ MetalDataType to_metal_type(DataType dt) {
 #undef METAL_CASE
 
     default:
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
       break;
   }
   return MetalDataType::unknown;
@@ -53,7 +53,7 @@ std::string metal_data_type_name(MetalDataType dt) {
     case MetalDataType::unknown:
       return "unknown";
     default:
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
       break;
   }
   return "";
@@ -82,7 +82,7 @@ size_t metal_data_type_bytes(MetalDataType dt) {
     case MetalDataType::u64:
       return 8;
     default:
-      TC_NOT_IMPLEMENTED;
+      TI_NOT_IMPLEMENTED;
       break;
   }
   return 0;
@@ -130,7 +130,7 @@ std::string metal_unary_op_type_symbol(UnaryOpType type) {
   // case UnaryOpType::rcp:
   // case UnaryOpType::undefined:
   default:
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
   return "";
 }
diff --git a/taichi/platform/metal/metal_kernel_util.cpp b/taichi/platform/metal/metal_kernel_util.cpp
index 5dd6a89614059..cccf8acd681cc 100644
--- a/taichi/platform/metal/metal_kernel_util.cpp
+++ b/taichi/platform/metal/metal_kernel_util.cpp
@@ -18,9 +18,9 @@ int MetalKernelArgsAttributes::insert_arg(DataType dt,
   if (dt_bytes != 4) {
     // Metal doesn't support 64bit data buffers.
     // TODO(k-ye): See if Metal supports less-than-32bit data buffers.
-    TC_WARN("Metal kernel only supports 32-bit data, got {}",
+    TI_WARN("Metal kernel only supports 32-bit data, got {}",
             metal_data_type_name(a.dt));
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
   a.is_array = is_array;
   a.stride = is_array ? size : dt_bytes;
diff --git a/taichi/platform/metal/metal_runtime.cpp b/taichi/platform/metal/metal_runtime.cpp
index 9c66a35282169..470dd2dc2df0f 100644
--- a/taichi/platform/metal/metal_runtime.cpp
+++ b/taichi/platform/metal/metal_runtime.cpp
@@ -7,17 +7,17 @@
 #include <taichi/context.h>
 #undef TI_RUNTIME_HOST
 
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
 #include <sys/mman.h>
 #include <unistd.h>
 
 #include "metal_api.h"
-#endif  // TC_PLATFORM_OSX
+#endif  // TI_PLATFORM_OSX
 
 TLANG_NAMESPACE_BEGIN
 namespace metal {
 
-#ifdef TC_PLATFORM_OSX
+#ifdef TI_PLATFORM_OSX
 
 namespace {
 using KernelTaskType = OffloadedStmt::TaskType;
@@ -32,7 +32,7 @@ class BufferMemoryView {
     // Both |ptr_| and |size_| must be aligned to page size.
     size_ = ((size + pagesize - 1) / pagesize) * pagesize;
     ptr_ = mem_pool->allocate(size_, pagesize);
-    TC_ASSERT(ptr_ != nullptr);
+    TI_ASSERT(ptr_ != nullptr);
   }
 
   inline size_t size() const { return size_; }
@@ -52,13 +52,13 @@ class CompiledMtlKernel {
         pipeline_state_(new_compute_pipeline_state_with_function(device, func)),
         profiler_(profiler),
         profiler_id_(fmt::format("{}_dispatch", kernel_attribs_.name)) {
-    TC_ASSERT(pipeline_state_ != nullptr);
+    TI_ASSERT(pipeline_state_ != nullptr);
   }
 
   void launch(MTLBuffer *root_buffer, MTLBuffer *global_tmps_buffer,
               MTLBuffer *args_buffer, MTLCommandBuffer *command_buffer) {
     // 0 is valid for |num_threads|!
-    TC_ASSERT(kernel_attribs_.num_threads >= 0);
+    TI_ASSERT(kernel_attribs_.num_threads >= 0);
     launch_if_not_empty(root_buffer, global_tmps_buffer, args_buffer,
                         command_buffer);
     if ((kernel_attribs_.task_type == KernelTaskType::range_for) &&
@@ -84,7 +84,7 @@ class CompiledMtlKernel {
     }
     profiler_->start(profiler_id_);
     auto encoder = new_compute_command_encoder(command_buffer);
-    TC_ASSERT(encoder != nullptr);
+    TI_ASSERT(encoder != nullptr);
 
     set_compute_pipeline_state(encoder.get(), pipeline_state_.get());
     int buffer_index = 0;
@@ -124,10 +124,10 @@ class CompiledTaichiKernel {
         mtl_source_code_(source_code),
         profiler_(profiler) {
     auto kernel_lib = new_library_with_source(device, mtl_source_code_);
-    TC_ASSERT(kernel_lib != nullptr);
+    TI_ASSERT(kernel_lib != nullptr);
     for (const auto &ka : mtl_kernels_attribs) {
       auto kernel_func = new_function_with_name(kernel_lib.get(), ka.name);
-      TC_ASSERT(kernel_func != nullptr);
+      TI_ASSERT(kernel_func != nullptr);
       // Note that CompiledMtlKernel doesn't own |kernel_func|.
       compiled_mtl_kernels.push_back(std::make_unique<CompiledMtlKernel>(
           ka, device, kernel_func.get(), profiler_));
@@ -183,7 +183,7 @@ class HostMetalArgsBlitter {
       } else if (arg.dt == MetalDataType::f32) {
         TO_METAL(float32);
       } else {
-        TC_ERROR("Metal does not support arg type={}",
+        TI_ERROR("Metal does not support arg type={}",
                  metal_data_type_name(arg.dt));
       }
     }
@@ -216,7 +216,7 @@ class HostMetalArgsBlitter {
         } else if (arg.dt == MetalDataType::f32) {
           TO_HOST(float32);
         } else {
-          TC_ERROR("Metal does not support arg type={}",
+          TI_ERROR("Metal does not support arg type={}",
                    metal_data_type_name(arg.dt));
         }
       }
@@ -250,16 +250,16 @@ class MetalRuntime::Impl {
         profiler_(profiler),
         root_buffer_mem_(std::max(root_size, 1UL), mem_pool) {
     if (config_->debug) {
-      TC_ASSERT(is_metal_api_available());
+      TI_ASSERT(is_metal_api_available());
     }
     device_ = mtl_create_system_default_device();
-    TC_ASSERT(device_ != nullptr);
+    TI_ASSERT(device_ != nullptr);
     command_queue_ = new_command_queue(device_.get());
-    TC_ASSERT(command_queue_ != nullptr);
+    TI_ASSERT(command_queue_ != nullptr);
     create_new_command_buffer();
     root_buffer_ = new_mtl_buffer_no_copy(device_.get(), root_buffer_mem_.ptr(),
                                           root_buffer_mem_.size());
-    TC_ASSERT(root_buffer_ != nullptr);
+    TI_ASSERT(root_buffer_ != nullptr);
   }
 
   void register_taichi_kernel(
@@ -267,14 +267,14 @@ class MetalRuntime::Impl {
       const std::string &mtl_kernel_source_code,
       const std::vector<MetalKernelAttributes> &kernels_attribs,
       size_t global_tmps_size, const MetalKernelArgsAttributes &args_attribs) {
-    TC_ASSERT(compiled_taichi_kernels_.find(taichi_kernel_name) ==
+    TI_ASSERT(compiled_taichi_kernels_.find(taichi_kernel_name) ==
               compiled_taichi_kernels_.end());
 
     if (config_->print_kernel_llvm_ir) {
       // If users have enabled |print_kernel_llvm_ir|, it probably means that
       // they want to see the compiled code on the given arch. Maybe rename this
       // flag, or add another flag (e.g. |print_kernel_source_code|)?
-      TC_INFO("Metal source code for kernel <{}>\n{}", taichi_kernel_name,
+      TI_INFO("Metal source code for kernel <{}>\n{}", taichi_kernel_name,
               mtl_kernel_source_code);
     }
     compiled_taichi_kernels_[taichi_kernel_name] =
@@ -282,7 +282,7 @@ class MetalRuntime::Impl {
             taichi_kernel_name, mtl_kernel_source_code, kernels_attribs,
             global_tmps_size, args_attribs, device_.get(), mem_pool_,
             profiler_);
-    TC_INFO("Registered Taichi kernel <{}>", taichi_kernel_name);
+    TI_INFO("Registered Taichi kernel <{}>", taichi_kernel_name);
   }
 
   void launch_taichi_kernel(const std::string &taichi_kernel_name,
@@ -290,7 +290,7 @@ class MetalRuntime::Impl {
     auto &ctk = *compiled_taichi_kernels_.find(taichi_kernel_name)->second;
     auto args_blitter = HostMetalArgsBlitter::make_if_has_args(ctk, ctx);
     if (config_->verbose_kernel_launches) {
-      TC_INFO("Lauching Taichi kernel <{}>", taichi_kernel_name);
+      TI_INFO("Lauching Taichi kernel <{}>", taichi_kernel_name);
     }
     if (args_blitter) {
       args_blitter->host_to_metal();
@@ -313,7 +313,7 @@ class MetalRuntime::Impl {
         const int end = ka->range_for_attribs.const_end
                             ? ka->range_for_attribs.end
                             : load_global_tmp(ka->range_for_attribs.end);
-        TC_ASSERT(ka->num_threads == -1);
+        TI_ASSERT(ka->num_threads == -1);
         ka->num_threads = end - begin;
       }
       mk->launch(root_buffer_.get(), ctk.global_tmps_buffer.get(),
@@ -339,7 +339,7 @@ class MetalRuntime::Impl {
  private:
   void create_new_command_buffer() {
     cur_command_buffer_ = new_command_buffer(command_queue_.get());
-    TC_ASSERT(cur_command_buffer_ != nullptr);
+    TI_ASSERT(cur_command_buffer_ != nullptr);
   }
   CompileConfig *const config_;
   MemoryPool *const mem_pool_;
@@ -359,7 +359,7 @@ class MetalRuntime::Impl {
  public:
   Impl(size_t root_size, CompileConfig *config, MemoryPool *mem_pool,
        ProfilerBase *profiler) {
-    TC_ERROR("Metal not supported on the current OS");
+    TI_ERROR("Metal not supported on the current OS");
   }
 
   void register_taichi_kernel(
@@ -367,18 +367,18 @@ class MetalRuntime::Impl {
       const std::string &mtl_kernel_source_code,
       const std::vector<MetalKernelAttributes> &kernels_attribs,
       size_t global_tmps_size, const MetalKernelArgsAttributes &args_attribs) {
-    TC_ERROR("Metal not supported on the current OS");
+    TI_ERROR("Metal not supported on the current OS");
   }
 
   void launch_taichi_kernel(const std::string &taichi_kernel_name,
                             Context *ctx) {
-    TC_ERROR("Metal not supported on the current OS");
+    TI_ERROR("Metal not supported on the current OS");
   }
 
-  void synchronize() { TC_ERROR("Metal not supported on the current OS"); }
+  void synchronize() { TI_ERROR("Metal not supported on the current OS"); }
 };
 
-#endif  // TC_PLATFORM_OSX
+#endif  // TI_PLATFORM_OSX
 
 MetalRuntime::MetalRuntime(size_t root_size, CompileConfig *config,
                            MemoryPool *mem_pool, ProfilerBase *profiler)
diff --git a/taichi/platform/metal/metal_runtime.h b/taichi/platform/metal/metal_runtime.h
index 982edfaff0c6c..43b2d0425d756 100644
--- a/taichi/platform/metal/metal_runtime.h
+++ b/taichi/platform/metal/metal_runtime.h
@@ -50,7 +50,7 @@ class MetalRuntime {
 
  private:
   // Use Pimpl so that we can expose this interface without conditionally
-  // compiling on TC_PLATFORM_OSX
+  // compiling on TI_PLATFORM_OSX
   class Impl;
   std::unique_ptr<Impl> impl_;
 };
diff --git a/taichi/profiler.cpp b/taichi/profiler.cpp
index dbf0fad040ead..a69c974db34db 100644
--- a/taichi/profiler.cpp
+++ b/taichi/profiler.cpp
@@ -154,7 +154,7 @@ std::unique_ptr<ProfilerBase> make_profiler(Arch arch) {
   } else if (arch == Arch::cuda) {
     return std::make_unique<CUDAProfiler>();
   } else {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
 }
 
diff --git a/taichi/profiler.h b/taichi/profiler.h
index 11b19401ee32c..f5d82dc9b52d9 100644
--- a/taichi/profiler.h
+++ b/taichi/profiler.h
@@ -7,7 +7,7 @@
 #include "common.h"
 #include "tlang_util.h"
 
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
 #undef min
 #undef max
 #endif
diff --git a/taichi/program.cpp b/taichi/program.cpp
index 24d56592f369a..10a1f4661c660 100644
--- a/taichi/program.cpp
+++ b/taichi/program.cpp
@@ -12,7 +12,7 @@
 #include "backends/struct_metal.h"
 #include "snode.h"
 
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
 
 #include <cuda_runtime.h>
 
@@ -26,35 +26,35 @@ Program *current_program = nullptr;
 std::atomic<int> Program::num_instances;
 
 Program::Program(Arch arch) {
-#if !defined(CUDA_FOUND)
+#if !defined(TI_WITH_CUDA)
   if (arch == Arch::cuda) {
-    TC_WARN("Taichi is not compiled with CUDA.");
-    TC_WARN("Falling back to x86_64");
+    TI_WARN("Taichi is not compiled with CUDA.");
+    TI_WARN("Falling back to x86_64");
     arch = Arch::x86_64;
   }
 #else
   if (!cuda_context) {
     cuda_context = std::make_unique<CUDAContext>();
     if (!cuda_context->detected()) {
-      TC_WARN("No CUDA device detected.");
-      TC_WARN("Falling back to x86_64");
+      TI_WARN("No CUDA device detected.");
+      TI_WARN("Falling back to x86_64");
       arch = Arch::x86_64;
     }
   }
 #endif
   if (arch == Arch::metal) {
     if (!metal::is_metal_api_available()) {
-      TC_WARN("No Metal API detected, falling back to x86_64");
+      TI_WARN("No Metal API detected, falling back to x86_64");
       arch = Arch::x86_64;
     }
   }
   memory_pool = std::make_unique<MemoryPool>(this);
-  TC_ASSERT_INFO(num_instances == 0, "Only one instance at a time");
+  TI_ASSERT_INFO(num_instances == 0, "Only one instance at a time");
   total_compilation_time = 0;
   num_instances += 1;
   SNode::counter = 0;
   // llvm_context_device is initialized before kernel compilation
-  TC_ASSERT(current_program == nullptr);
+  TI_ASSERT(current_program == nullptr);
   current_program = this;
   config = default_compile_config;
   config.arch = arch;
@@ -69,7 +69,7 @@ Program::Program(Arch arch) {
   snode_root = std::make_unique<SNode>(0, SNodeType::root);
 
   if (config.debug) {
-    TC_DEBUG("Program arch={}", arch_name(arch));
+    TI_DEBUG("Program arch={}", arch_name(arch));
   }
 }
 
@@ -87,9 +87,9 @@ FunctionType Program::compile(Kernel &kernel) {
     metal::MetalCodeGen codegen(kernel.name, &metal_struct_compiled_.value());
     ret = codegen.compile(*this, kernel, metal_runtime_.get());
   } else {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
-  TC_ASSERT(ret);
+  TI_ASSERT(ret);
   total_compilation_time += Time::get_time() - start_t;
   return ret;
 }
@@ -118,17 +118,17 @@ void Program::materialize_layout() {
           metal_struct_compiled_->root_size, &config, memory_pool.get(),
           profiler_llvm.get());
     }
-    TC_INFO("Metal root buffer size: {} B", metal_struct_compiled_->root_size);
+    TI_INFO("Metal root buffer size: {} B", metal_struct_compiled_->root_size);
   }
 }
 
 void Program::synchronize() {
   if (!sync) {
     if (config.arch == Arch::cuda) {
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
       cudaDeviceSynchronize();
 #else
-      TC_ERROR("No CUDA support");
+      TI_ERROR("No CUDA support");
 #endif
     } else if (config.arch == Arch::metal) {
       metal_runtime_->synchronize();
@@ -146,7 +146,7 @@ std::string latex_short_digit(int v) {
   std::string units = "KMGT";
   int unit_id = -1;
   while (v >= 1024 && unit_id + 1 < (int)units.size()) {
-    TC_ASSERT(v % 1024 == 0);
+    TI_ASSERT(v % 1024 == 0);
     v /= 1024;
     unit_id++;
   }
@@ -159,7 +159,7 @@ std::string latex_short_digit(int v) {
 void Program::visualize_layout(const std::string &fn) {
   {
     std::ofstream ofs(fn);
-    TC_ASSERT(ofs);
+    TI_ASSERT(ofs);
     auto emit = [&](std::string str) { ofs << str; };
 
     auto header = R"(
@@ -227,7 +227,7 @@ void Program::initialize_device_llvm_context() {
 }
 
 Kernel &Program::get_snode_reader(SNode *snode) {
-  TC_ASSERT(snode->type == SNodeType::place);
+  TI_ASSERT(snode->type == SNodeType::place);
   auto kernel_name = fmt::format("snode_reader_{}", snode->id);
   auto &ker = kernel([&] {
     ExprGroup indices;
@@ -255,7 +255,7 @@ Kernel &Program::get_snode_reader(SNode *snode) {
 }
 
 Kernel &Program::get_snode_writer(SNode *snode) {
-  TC_ASSERT(snode->type == SNodeType::place);
+  TI_ASSERT(snode->type == SNodeType::place);
   auto kernel_name = fmt::format("snode_writer_{}", snode->id);
   auto &ker = kernel([&] {
     ExprGroup indices;
@@ -284,10 +284,10 @@ void Program::finalize() {
   synchronize();
   current_program = nullptr;
   for (auto &dll : loaded_dlls) {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
     dlclose(dll);
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   }
   memory_pool->terminate();
diff --git a/taichi/program.h b/taichi/program.h
index b13ecd1b52aeb..0cc5718799754 100644
--- a/taichi/program.h
+++ b/taichi/program.h
@@ -18,7 +18,7 @@
 #include <taichi/platform/metal/metal_kernel_util.h>
 #include <taichi/platform/metal/metal_runtime.h>
 
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
 #include <dlfcn.h>
 #endif
 
@@ -26,7 +26,7 @@ TLANG_NAMESPACE_BEGIN
 
 extern Program *current_program;
 
-TC_FORCE_INLINE Program &get_current_program() {
+TI_FORCE_INLINE Program &get_current_program() {
   return *current_program;
 }
 
@@ -143,7 +143,7 @@ class Program {
   void materialize_layout();
 
   inline Kernel &get_current_kernel() {
-    TC_ASSERT(current_kernel);
+    TI_ASSERT(current_kernel);
     return *current_kernel;
   }
 
diff --git a/taichi/python/exception.cpp b/taichi/python/exception.cpp
index 4ac376d3ad8fe..b7e420870f3ce 100644
--- a/taichi/python/exception.cpp
+++ b/taichi/python/exception.cpp
@@ -5,14 +5,14 @@
 
 #include <taichi/python/exception.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 void raise_assertion_failure_in_python(const std::string &msg) {
   // throw ExceptionForPython(msg);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
-TC_EXPORT void taichi_raise_assertion_failure_in_python(const char *msg) {
+TI_EXPORT void taichi_raise_assertion_failure_in_python(const char *msg) {
   taichi::raise_assertion_failure_in_python(std::string(msg));
 }
diff --git a/taichi/python/exception.h b/taichi/python/exception.h
index 6de34827a952a..a677012026235 100644
--- a/taichi/python/exception.h
+++ b/taichi/python/exception.h
@@ -8,7 +8,7 @@
 #include <taichi/common/interface.h>
 #include <exception>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class ExceptionForPython : public std::exception {
  private:
@@ -24,4 +24,4 @@ class ExceptionForPython : public std::exception {
 
 void raise_assertion_failure_in_python(const std::string &msg);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/python/export.cpp b/taichi/python/export.cpp
index 16966affe5adc..ec7e959675d59 100644
--- a/taichi/python/export.cpp
+++ b/taichi/python/export.cpp
@@ -8,7 +8,7 @@
 #include <taichi/io/io.h>
 #include <taichi/geometry/factory.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 void export_lang(py::module &m);
 
@@ -25,4 +25,4 @@ PYBIND11_MODULE(taichi_core, m) {
   export_lang(m);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/python/export.h b/taichi/python/export.h
index 5123ec28324ce..ba425d91d55b4 100644
--- a/taichi/python/export.h
+++ b/taichi/python/export.h
@@ -28,7 +28,7 @@
 #include <taichi/io/io.h>
 #include <taichi/common/util.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 namespace py = pybind11;
 
@@ -56,4 +56,4 @@ void export_misc(py::module &m);
 
 #define DEFINE_VECTOR_OF(x) DEFINE_VECTOR_OF_NAMED(x, #x "List");
 
-TC_NAMESPACE_END
\ No newline at end of file
+TI_NAMESPACE_END
\ No newline at end of file
diff --git a/taichi/python/export_math.cpp b/taichi/python/export_math.cpp
index bbf520fafca99..337a860e6f990 100644
--- a/taichi/python/export_math.cpp
+++ b/taichi/python/export_math.cpp
@@ -6,7 +6,7 @@
 #include <taichi/python/export.h>
 #include <taichi/common/dict.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <typename T, int ret>
 int return_constant(T *) {
@@ -59,8 +59,8 @@ void array2d_to_ndarray(T *arr,
                         uint64 output)  // 'output' is actually a pointer...
 {
   int width = arr->get_width(), height = arr->get_height();
-  TC_ASSERT(width > 0);
-  TC_ASSERT(height > 0);
+  TI_ASSERT(width > 0);
+  TI_ASSERT(height > 0);
   for (auto &ind : arr->get_region()) {
     for (int k = 0; k < channels; k++) {
       reinterpret_cast<real *>(
@@ -323,4 +323,4 @@ void export_math(py::module &m) {
   VectorRegistration<Vector4i>::run(m);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/python/export_misc.cpp b/taichi/python/export_misc.cpp
index a280b31acf7f3..eb2a55e8518c0 100644
--- a/taichi/python/export_misc.cpp
+++ b/taichi/python/export_misc.cpp
@@ -18,7 +18,7 @@
 #include <cuda_runtime_api.h>
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 extern Function11 python_at_exit;
 
@@ -65,16 +65,16 @@ stdout = fdopen(fd[1], "w");
 auto file_fd = fdopen(fd[0], "w");
 FILE *file = freopen(fn.c_str(), "w", file_fd);
 */
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
   std::cerr.rdbuf(std::cout.rdbuf());
   dup2(fileno(popen(fmt::format("tee {}", fn).c_str(), "w")), STDOUT_FILENO);
 #else
-  TC_NOT_IMPLEMENTED;
+  TI_NOT_IMPLEMENTED;
 #endif
 }
 
 void stop_duplicating_stdout_to_file(const std::string &fn) {
-  TC_NOT_IMPLEMENTED;
+  TI_NOT_IMPLEMENTED;
 }
 
 bool with_cuda() {
@@ -112,17 +112,17 @@ void export_misc(py::module &m) {
       .def("close_dll", &UnitDLL::close_dll)
       .def("loaded", &UnitDLL::loaded);
 
-#define TC_EXPORT_LOGGING(X) \
+#define TI_EXPORT_LOGGING(X) \
   m.def(#X, [](const std::string &msg) { taichi::logger.X(msg); });
 
   m.def("flush_log", []() { taichi::logger.flush(); });
 
-  TC_EXPORT_LOGGING(trace);
-  TC_EXPORT_LOGGING(debug);
-  TC_EXPORT_LOGGING(info);
-  TC_EXPORT_LOGGING(warn);
-  TC_EXPORT_LOGGING(error);
-  TC_EXPORT_LOGGING(critical);
+  TI_EXPORT_LOGGING(trace);
+  TI_EXPORT_LOGGING(debug);
+  TI_EXPORT_LOGGING(info);
+  TI_EXPORT_LOGGING(warn);
+  TI_EXPORT_LOGGING(error);
+  TI_EXPORT_LOGGING(critical);
 
   m.def("duplicate_stdout_to_file", duplicate_stdout_to_file);
 
@@ -166,4 +166,4 @@ void export_misc(py::module &m) {
   m.def("with_metal", taichi::Tlang::metal::is_metal_api_available);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/python/export_visual.cpp b/taichi/python/export_visual.cpp
index 7455a682068be..2337c7edbd13a 100644
--- a/taichi/python/export_visual.cpp
+++ b/taichi/python/export_visual.cpp
@@ -9,7 +9,7 @@
 #include <taichi/geometry/factory.h>
 #include <taichi/visual/gui.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 void export_visual(py::module &m) {
   // GUI
@@ -56,4 +56,4 @@ void export_visual(py::module &m) {
            py::return_value_policy::reference);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/python_bindings.cpp b/taichi/python_bindings.cpp
index 3a602749fcfe5..483aaeed90bd9 100644
--- a/taichi/python_bindings.cpp
+++ b/taichi/python_bindings.cpp
@@ -8,11 +8,11 @@
 #include <taichi/python/export.h>
 #include "svd.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 bool test_threading();
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
 
 TLANG_NAMESPACE_BEGIN
 
@@ -24,7 +24,7 @@ Expr expr_index(const Expr &expr, const Expr &index) {
 
 void expr_assign(const Expr &lhs_, const Expr &rhs, std::string tb) {
   auto lhs = ptr_if_global(lhs_);
-  TC_ASSERT(lhs->is_lvalue());
+  TI_ASSERT(lhs->is_lvalue());
   auto stmt = std::make_unique<FrontendAssignStmt>(lhs, load_if_ptr(rhs));
   stmt->set_tb(tb);
   current_ast_builder().insert(std::move(stmt));
@@ -37,7 +37,7 @@ std::string libdevice_path();
 
 TLANG_NAMESPACE_END
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 void export_lang(py::module &m) {
   using namespace taichi::Tlang;
 
@@ -131,9 +131,16 @@ void export_lang(py::module &m) {
            (SNode & (SNode::*)(const std::vector<Index> &,
                                const std::vector<int> &))(&SNode::dense),
            py::return_value_policy::reference)
+      .def("pointer",
+          (SNode & (SNode::*)(const std::vector<Index> &,
+                              const std::vector<int> &))(&SNode::pointer),
+          py::return_value_policy::reference)
+      .def("hash",
+          (SNode & (SNode::*)(const std::vector<Index> &,
+                              const std::vector<int> &))(&SNode::hash),
+          py::return_value_policy::reference)
       .def("dynamic", &SNode::dynamic_chunked,
            py::return_value_policy::reference)
-      .def("pointer", &SNode::pointer, py::return_value_policy::reference)
       .def("bitmasked", &SNode::bitmasked)
       .def("place", (SNode & (SNode::*)(Expr &))(&SNode::place),
            py::return_value_policy::reference)
@@ -383,7 +390,7 @@ void export_lang(py::module &m) {
 
   m.def("global_new", static_cast<Expr (*)(Expr, DataType)>(global_new));
   m.def("set_global_grad", [&](const Expr &expr) {
-    TC_ASSERT(expr.is<GlobalVariableExpression>());
+    TI_ASSERT(expr.is<GlobalVariableExpression>());
     expr.cast<GlobalVariableExpression>()->is_primal = false;
   });
   m.def("data_type_name", data_type_name);
@@ -409,7 +416,7 @@ void export_lang(py::module &m) {
     try {
       throw IRModified();
     } catch (IRModified) {
-      TC_INFO("caught");
+      TI_INFO("caught");
     }
   });
   // Schedules
@@ -434,7 +441,7 @@ void export_lang(py::module &m) {
   m.def("get_version_minor", get_version_minor);
   m.def("get_version_patch", get_version_patch);
   m.def("test_printf", [] { printf("test_printf\n"); });
-  m.def("test_logging", [] { TC_INFO("test_logging\n"); });
+  m.def("test_logging", [] { TI_INFO("test_logging\n"); });
   m.def("trigger_crash", [] { *(int *)(1) = 0; });
   m.def("get_max_num_indices", [] { return max_num_indices; });
   m.def("get_max_num_args", [] { return max_num_args; });
@@ -447,4 +454,4 @@ void export_lang(py::module &m) {
   m.def("is_supported", is_supported);
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/runtime/internal_function.h b/taichi/runtime/internal_function.h
index ed5ae9408b721..425b1912f47b8 100644
--- a/taichi/runtime/internal_function.h
+++ b/taichi/runtime/internal_function.h
@@ -19,7 +19,7 @@ i32 test_list_manager(Context *context) {
     list->append(&j);
   }
   for (int i = 0; i < 320; i++) {
-    TC_ASSERT(list->get<i32>(i) == i + 5);
+    TI_ASSERT(list->get<i32>(i) == i + 5);
   }
   return 0;
 }
@@ -45,14 +45,14 @@ i32 test_node_allocator(Context *context) {
     ptrs[i] = nodes->allocate();
   }
   for (int i = 5; i < 19; i++) {
-    TC_ASSERT(nodes->locate(ptrs[i]) == i);
+    TI_ASSERT(nodes->locate(ptrs[i]) == i);
   }
 
   for (int i = 19; i < 24; i++) {
     auto idx = nodes->locate(ptrs[i]);
     Printf("i %d", i);
     Printf("idx %d", idx);
-    TC_ASSERT(idx == i - 19);
+    TI_ASSERT(idx == i - 19);
   }
   return 0;
 }
diff --git a/taichi/runtime/node_pointer.h b/taichi/runtime/node_pointer.h
index a75e959134335..01c4d735217cf 100644
--- a/taichi/runtime/node_pointer.h
+++ b/taichi/runtime/node_pointer.h
@@ -1,15 +1,20 @@
 #pragma once
 
 // Specialized Attributes and functions
-struct PointerMeta : public StructMeta {
+struct pointerMeta : public StructMeta {
   bool _;
 };
 
-STRUCT_FIELD(PointerMeta, _);
+STRUCT_FIELD(pointerMeta, _);
 
-void Pointer_activate(Ptr meta, Ptr node, int i) {
-  Ptr lock = node;
-  Ptr &data_ptr = *(Ptr *)(node + 8);
+i32 pointer_get_num_elements(Ptr meta, Ptr node) {
+  return ((StructMeta *)meta)->max_num_elements;
+}
+
+void pointer_activate(Ptr meta, Ptr node, int i) {
+  auto num_elements = pointer_get_num_elements(meta, node);
+  Ptr lock = node + 8*i;
+  Ptr &data_ptr = *(Ptr *)(node + 8*(num_elements + i));
   if (data_ptr == nullptr) {
     locked_task(lock, [&] {
       if (data_ptr == nullptr) {
@@ -22,9 +27,10 @@ void Pointer_activate(Ptr meta, Ptr node, int i) {
   }
 }
 
-void Pointer_deactivate(Ptr meta, Ptr node) {
-  Ptr lock = node;
-  Ptr &data_ptr = *(Ptr *)(node + 8);
+void pointer_deactivate(Ptr meta, Ptr node, int i) {
+  auto num_elements = pointer_get_num_elements(meta, node);
+  Ptr lock = node + 8*i;
+  Ptr &data_ptr = *(Ptr *)(node + 8*(num_elements + i));
   if (data_ptr != nullptr) {
     locked_task(lock, [&] {
       if (data_ptr != nullptr) {
@@ -38,13 +44,15 @@ void Pointer_deactivate(Ptr meta, Ptr node) {
   }
 }
 
-i32 Pointer_is_active(Ptr meta, Ptr node, int i) {
-  auto data_ptr = *(Ptr *)(node + 8);
+i32 pointer_is_active(Ptr meta, Ptr node, int i) {
+  auto num_elements = pointer_get_num_elements(meta, node);
+  auto data_ptr = *(Ptr *)(node + 8*(num_elements + i));
   return data_ptr != nullptr;
 }
 
-Ptr Pointer_lookup_element(Ptr meta, Ptr node, int i) {
-  auto data_ptr = *(Ptr *)(node + 8);
+Ptr pointer_lookup_element(Ptr meta, Ptr node, int i) {
+  auto num_elements = pointer_get_num_elements(meta, node);
+  auto data_ptr = *(Ptr *)(node + 8*(num_elements + i));
   if (data_ptr == nullptr) {
     auto smeta = (StructMeta *)meta;
     auto context = smeta->context;
@@ -52,7 +60,3 @@ Ptr Pointer_lookup_element(Ptr meta, Ptr node, int i) {
   }
   return data_ptr;
 }
-
-i32 Pointer_get_num_elements(Ptr meta, Ptr node) {
-  return 1;
-}
diff --git a/taichi/runtime/runtime.cpp b/taichi/runtime/runtime.cpp
index 3139c10597e1c..3ace69b55b924 100644
--- a/taichi/runtime/runtime.cpp
+++ b/taichi/runtime/runtime.cpp
@@ -1,4 +1,4 @@
-#if !defined(TC_INCLUDED) || !defined(_WIN32)
+#if !defined(TI_INCLUDED) || !defined(_WIN32)
 // This file will only be compiled with clang into llvm bitcode
 // Generated bitcode will likely get inline for performance.
 // Most function calls here will be inlined
@@ -296,8 +296,8 @@ constexpr bool enable_assert = true;
 
 void taichi_assert(Context *context, i32 test, const char *msg);
 void taichi_assert_runtime(Runtime *runtime, i32 test, const char *msg);
-#define TC_ASSERT_INFO(x, msg) taichi_assert(context, (int)(x), msg)
-#define TC_ASSERT(x) TC_ASSERT_INFO(x, #x)
+#define TI_ASSERT_INFO(x, msg) taichi_assert(context, (int)(x), msg)
+#define TI_ASSERT(x) TI_ASSERT_INFO(x, #x)
 
 void ___stubs___() {
   printf("");
diff --git a/taichi/scratch_pad.h b/taichi/scratch_pad.h
index 5e64c8d404e87..79683a05fe269 100644
--- a/taichi/scratch_pad.h
+++ b/taichi/scratch_pad.h
@@ -43,7 +43,7 @@ class ScratchPad {
   ScratchPad() = default;
 
   ScratchPad(SNode *snode) : snode(snode) {
-    TC_ASSERT(snode != nullptr);
+    TI_ASSERT(snode != nullptr);
     dim = snode->num_active_indices;
     bounds[0].resize(dim);
     bounds[1].resize(dim);
@@ -60,9 +60,9 @@ class ScratchPad {
   }
 
   void access(const std::vector<int> &indices, AccessFlag flags) {
-    TC_ASSERT(!finalized);
+    TI_ASSERT(!finalized);
     empty = true;
-    TC_ASSERT((int)indices.size() == dim);
+    TI_ASSERT((int)indices.size() == dim);
     for (int i = 0; i < dim; i++) {
       bounds[0][i] = std::min(bounds[0][i], indices[i]);
       bounds[1][i] = std::max(bounds[1][i], indices[i] + 1);
@@ -82,8 +82,8 @@ class ScratchPad {
     for (int i = 0; i < dim; i++) {
       block_size[i] =
           1 << snode->extractors[snode->physical_index_position[i]].num_bits;
-      TC_ASSERT(bounds[0][i] != std::numeric_limits<int>::max());
-      TC_ASSERT(bounds[1][i] != std::numeric_limits<int>::min());
+      TI_ASSERT(bounds[0][i] != std::numeric_limits<int>::max());
+      TI_ASSERT(bounds[1][i] != std::numeric_limits<int>::min());
     }
 
     finalized = true;
@@ -107,7 +107,7 @@ class ScratchPad {
   }
 
   int linear_size() {
-    TC_ASSERT(finalized);
+    TI_ASSERT(finalized);
     int s = 1;
     for (int i = 0; i < dim; i++) {
       s *= pad_size[i];
@@ -117,7 +117,7 @@ class ScratchPad {
 
   int linearized_index(const std::vector<int> &indices) {
     int ret = 0;
-    TC_ASSERT(finalized);
+    TI_ASSERT(finalized);
     for (int i = 0; i < dim; i++) {
       ret *= (bounds[1][i] - bounds[0][i]);
       ret += indices[i] - bounds[0][i];
@@ -147,10 +147,10 @@ class ScratchPad {
   std::string global_to_linearized_local(const std::vector<Stmt *> &loop_vars,
                                          const std::vector<Stmt *> &indices) {
     std::string ret = "";
-    TC_ASSERT((int)indices.size() == dim);
+    TI_ASSERT((int)indices.size() == dim);
     int step_size = linear_size();
     for (int i = 0; i < (int)indices.size(); i++) {
-      TC_ASSERT(step_size % pad_size[i] == 0);
+      TI_ASSERT(step_size % pad_size[i] == 0);
       step_size /= pad_size[i];
       ret += fmt::format(" + ({} - {}_base - {}) * {}", indices[i]->raw_name(),
                          loop_vars[i]->raw_name(), bounds[0][i], step_size);
@@ -174,12 +174,12 @@ class ScratchPads {
       pads.emplace(std::piecewise_construct, std::forward_as_tuple(snode),
                    std::forward_as_tuple(snode));
     } else {
-      TC_ERROR("ScratchPad for {} already exists.", snode->node_type_name);
+      TI_ERROR("ScratchPad for {} already exists.", snode->node_type_name);
     }
   }
 
   void access(SNode *snode, const std::vector<int> &indices, AccessFlag flags) {
-    TC_ASSERT(snode != nullptr);
+    TI_ASSERT(snode != nullptr);
     if (pads.find(snode) == pads.end())
       return;
     pads.find(snode)->second.access(indices, flags);
@@ -224,15 +224,15 @@ class ScratchPads {
       }
     } else if (pads.find(snode->parent) != pads.end()) {
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
   }
 
   void print() {
     for (auto &it : pads) {
-      TC_P(it.first->node_type_name);
-      TC_P(it.second.bounds[0]);
-      TC_P(it.second.bounds[1]);
+      TI_P(it.first->node_type_name);
+      TI_P(it.second.bounds[0]);
+      TI_P(it.second.bounds[1]);
     }
   }
 
@@ -241,7 +241,7 @@ class ScratchPads {
   }
 
   ScratchPad &get(SNode *snode) {
-    TC_ASSERT(pads.find(snode) != pads.end());
+    TI_ASSERT(pads.find(snode) != pads.end());
     return pads[snode];
   }
 };
diff --git a/taichi/snode.cpp b/taichi/snode.cpp
index 1941898771c81..578a94db7552f 100644
--- a/taichi/snode.cpp
+++ b/taichi/snode.cpp
@@ -11,9 +11,9 @@ SNode &SNode::place(Expr &expr_) {
   if (type == SNodeType::root) {  // never directly place to root
     this->dense(std::vector<Index>(), {}).place(expr_);
   } else {
-    TC_ASSERT(expr_.is<GlobalVariableExpression>());
+    TI_ASSERT(expr_.is<GlobalVariableExpression>());
     auto expr = expr_.cast<GlobalVariableExpression>();
-    TC_ERROR_UNLESS(expr->snode == nullptr, "This variable has been placed.");
+    TI_ERROR_UNLESS(expr->snode == nullptr, "This variable has been placed.");
     auto &child = insert_children(SNodeType::place);
     expr->set_snode(&child);
     child.name = expr->ident.raw_name();
@@ -30,13 +30,13 @@ SNode &SNode::place(Expr &expr_) {
 SNode &SNode::create_node(std::vector<Index> indices,
                           std::vector<int> sizes,
                           SNodeType type) {
-  TC_ASSERT(indices.size() == sizes.size() || sizes.size() == 1);
+  TI_ASSERT(indices.size() == sizes.size() || sizes.size() == 1);
   if (sizes.size() == 1) {
     sizes = std::vector<int>(indices.size(), sizes[0]);
   }
 
   if (type == SNodeType::hash)
-    TC_ASSERT_INFO(depth == 0,
+    TI_ASSERT_INFO(depth == 0,
                    "hashed node must be child of root due to initialization "
                    "memset limitation.");
   auto &new_node = insert_children(type);
@@ -45,10 +45,10 @@ SNode &SNode::create_node(std::vector<Index> indices,
     auto s = sizes[i];
     if (!bit::is_power_of_two(s)) {
       auto promoted_s = bit::least_pot_bound(s);
-      TC_DEBUG("Non-power-of-two node size {} promoted to {}.", s, promoted_s);
+      TI_DEBUG("Non-power-of-two node size {} promoted to {}.", s, promoted_s);
       s = promoted_s;
     }
-    TC_ASSERT(bit::is_power_of_two(s));
+    TI_ASSERT(bit::is_power_of_two(s));
     new_node.n *= s;
   }
   for (int i = 0; i < (int)indices.size(); i++) {
@@ -105,7 +105,7 @@ void SNode::lazy_grad() {
 }
 
 bool SNode::is_primal() const {
-  TC_ASSERT(expr.expr != nullptr);
+  TI_ASSERT(expr.expr != nullptr);
   return expr.cast<GlobalVariableExpression>()->is_primal;
 }
 
@@ -120,7 +120,7 @@ bool SNode::has_grad() const {
 }
 
 SNode *SNode::get_grad() const {
-  TC_ASSERT(has_grad());
+  TI_ASSERT(has_grad());
   return expr.cast<GlobalVariableExpression>()
       ->adjoint.cast<GlobalVariableExpression>()
       ->snode;
@@ -152,7 +152,7 @@ float64 SNode::read_float(const std::vector<int> &I) {
   } else if (dt == DataType::f64) {
     return get_current_program().context.get_arg<float64>(num_active_indices);
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
@@ -179,7 +179,7 @@ int64 SNode::read_int(const std::vector<int> &I) {
   } else if (dt == DataType::i64) {
     return get_current_program().context.get_arg<int64>(num_active_indices);
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
diff --git a/taichi/snode.h b/taichi/snode.h
index cc4c662db9057..a1d4b8d6490d3 100644
--- a/taichi/snode.h
+++ b/taichi/snode.h
@@ -41,7 +41,7 @@ class Index {
     value = 0;
   }
   Index(int value) : value(value) {
-    TC_ERROR_UNLESS(0 <= value && value < max_num_indices,
+    TI_ERROR_UNLESS(0 <= value && value < max_num_indices,
                     "Too many dimensions. The maximum dimensionality is {}",
                     max_num_indices);
   }
@@ -139,6 +139,32 @@ class SNode {
     return SNode::dense(std::vector<Index>{index}, size);
   }
 
+  SNode &pointer(const std::vector<Index> &indices,
+               const std::vector<int> &sizes) {
+    return create_node(indices, sizes, SNodeType::pointer);
+  }
+
+  SNode &pointer(const std::vector<Index> &indices, int sizes) {
+    return create_node(indices, std::vector<int>{sizes}, SNodeType::pointer);
+  }
+
+  SNode &pointer(const Index &index, int size) {
+    return SNode::pointer(std::vector<Index>{index}, size);
+  }
+
+  SNode &hash(const std::vector<Index> &indices,
+              const std::vector<int> &sizes) {
+    return create_node(indices, sizes, SNodeType::hash);
+  }
+
+  SNode &hash(const std::vector<Index> &indices, int sizes) {
+    return create_node(indices, std::vector<int>{sizes}, SNodeType::hash);
+  }
+
+  SNode &hash(const Index &index, int size) {
+    return hash(std::vector<Index>{index}, size);
+  }
+
   SNode &multi_threaded(bool val = true) {
     this->_multi_threaded = val;
     return *this;
@@ -178,8 +204,8 @@ class SNode {
   SNode &place(Expr &expr);
 
   SNode &dynamic_chunked(const Index &expr, int n, int chunk_size) {
-    TC_ASSERT(bit::is_power_of_two(n));
-    TC_ASSERT(bit::is_power_of_two(chunk_size));
+    TI_ASSERT(bit::is_power_of_two(n));
+    TI_ASSERT(bit::is_power_of_two(chunk_size));
     auto &child = insert_children(SNodeType::dynamic);
     child.extractors[expr.value].activate(bit::log2int(n));
     child.n = n;
@@ -187,22 +213,6 @@ class SNode {
     return child;
   }
 
-  SNode &hash(const std::vector<Index> indices, std::vector<int> sizes) {
-    return create_node(indices, sizes, SNodeType::hash);
-  }
-
-  SNode &hash(const std::vector<Index> indices, int sizes) {
-    return create_node(indices, std::vector<int>{sizes}, SNodeType::hash);
-  }
-
-  SNode &hash(const Index &expr, int n) {
-    return hash(std::vector<Index>{expr}, n);
-  }
-
-  SNode &pointer() {
-    return insert_children(SNodeType::pointer);
-  }
-
   SNode &morton(bool val = true) {
     _morton = val;
     return *this;
@@ -214,8 +224,8 @@ class SNode {
   }
 
   void *evaluate(void *ds, int i, int j, int k, int l) {
-    TC_ASSERT(access_func);
-    TC_ASSERT(max_num_indices == 4);
+    TI_ASSERT(access_func);
+    TI_ASSERT(max_num_indices == 4);
     return access_func(ds, i, j, k, l);
   }
 
@@ -227,8 +237,8 @@ class SNode {
   void write_int(const std::vector<int> &I, int64);
   int64 read_int(const std::vector<int> &I);
 
-  TC_FORCE_INLINE AllocatorStat stat() {
-    TC_ASSERT(stat_func);
+  TI_FORCE_INLINE AllocatorStat stat() {
+    TI_ASSERT(stat_func);
     return stat_func();
   }
 
@@ -279,12 +289,12 @@ class SNode {
   }
 
   std::string get_ch_from_parent_func_name() const {
-    TC_ASSERT(parent != nullptr);
+    TI_ASSERT(parent != nullptr);
     return fmt::format("get_ch_{}_to_{}", parent->get_name(), get_name());
   }
 
   std::string refine_coordinates_func_name() const {
-    TC_ASSERT(type != SNodeType::place);
+    TI_ASSERT(type != SNodeType::place);
     return fmt::format("{}_refine_coordinates", get_name());
   }
 
diff --git a/taichi/statements.h b/taichi/statements.h
index 4779359a71e12..d9c84dcb87d33 100644
--- a/taichi/statements.h
+++ b/taichi/statements.h
@@ -69,7 +69,7 @@ class LinearizeStmt : public Stmt {
   LinearizeStmt(const std::vector<Stmt *> &inputs,
                 const std::vector<int> &strides)
       : inputs(inputs), strides(strides) {
-    TC_ASSERT(inputs.size() == strides.size());
+    TI_ASSERT(inputs.size() == strides.size());
     for (auto &op : this->inputs) {
       add_operand(op);
     }
diff --git a/taichi/struct.h b/taichi/struct.h
index 1b7557aa80f54..14528805b2348 100644
--- a/taichi/struct.h
+++ b/taichi/struct.h
@@ -2,7 +2,7 @@
 #pragma once
 
 #include "common.h"
-#if !defined(TC_PLATFORM_WINDOWS)
+#if !defined(TI_PLATFORM_WINDOWS)
 #include "arithmetics.h"
 #endif
 #if defined(TLANG_GPU)
@@ -15,19 +15,19 @@
 // bits come from.
 
 #if defined(TLANG_KERNEL)
-#define TC_EXPORT
+#define TI_EXPORT
 #if defined(TLANG_GPU)
-#define TC_DEVICE __device__ __host__
-#define TLANG_ACCESSOR __device__ __host__ TC_FORCE_INLINE
+#define TI_DEVICE __device__ __host__
+#define TLANG_ACCESSOR __device__ __host__ TI_FORCE_INLINE
 #else
-#define TC_DEVICE
-#define TLANG_ACCESSOR TC_FORCE_INLINE
+#define TI_DEVICE
+#define TLANG_ACCESSOR TI_FORCE_INLINE
 #endif
 #else
 #define TLANG_ACCESSOR
-#undef TC_EXPORT
-#define TC_EXPORT extern "C"
-#define TC_DEVICE
+#undef TI_EXPORT
+#define TI_EXPORT extern "C"
+#define TI_DEVICE
 #endif
 
 TLANG_NAMESPACE_BEGIN
@@ -93,16 +93,16 @@ struct SNodeAllocator {
 
   __host__ __device__ SNodeMeta *allocate_node(
       const PhysicalIndexGroup &index) {
-    TC_ASSERT(this != nullptr);
-    TC_ASSERT(data_pool != nullptr);
-    TC_ASSERT(resident_pool != nullptr);
+    TI_ASSERT(this != nullptr);
+    TI_ASSERT(data_pool != nullptr);
+    TI_ASSERT(resident_pool != nullptr);
     auto id = atomic_add(&resident_tail, 1UL);
 #if defined(TL_DEBUG)
     if (id >= pool_size) {
       printf("pool size %lld\n", pool_size);
     }
 #endif
-    TC_ASSERT(id < pool_size);
+    TI_ASSERT(id < pool_size);
     SNodeMeta &meta = resident_pool[id];
     meta.active = true;
     meta.ptr = data_pool + id;
@@ -168,7 +168,7 @@ struct Managers {
 
   __host__ __device__ static void initialize() {
     auto addr = create_unified<Managers>();
-    TC_ASSERT(addr == get_instance());
+    TI_ASSERT(addr == get_instance());
   }
 
   template <typename T>
@@ -328,27 +328,27 @@ template <typename child_type_>
 struct layout_root {
   using child_type = child_type_;
   child_type children;
-  TC_DEVICE TC_FORCE_INLINE child_type *look_up(
+  TI_DEVICE TI_FORCE_INLINE child_type *look_up(
       int i) {  // i is flattened index
     return &children;
   }
 
-  TC_DEVICE TC_FORCE_INLINE int get_n() const {
+  TI_DEVICE TI_FORCE_INLINE int get_n() const {
     return 1;
   }
 
-  TC_DEVICE TC_FORCE_INLINE static int constexpr get_max_n() {
+  TI_DEVICE TI_FORCE_INLINE static int constexpr get_max_n() {
     return 1;
   }
 
-  TC_DEVICE TC_FORCE_INLINE void activate(int i,
+  TI_DEVICE TI_FORCE_INLINE void activate(int i,
                                           const PhysicalIndexGroup &index) {
   }
 
   static constexpr bool has_null = true;
 };
 
-TC_FORCE_INLINE constexpr uint32 log2int(uint64 value) {
+TI_FORCE_INLINE constexpr uint32 log2int(uint64 value) {
   int ret = 0;
   value >>= 1;
   while (value) {
@@ -358,7 +358,7 @@ TC_FORCE_INLINE constexpr uint32 log2int(uint64 value) {
   return ret;
 }
 
-TC_DEVICE TC_FORCE_INLINE uint32 extract_bits(uint32 n, int begin, int end) {
+TI_DEVICE TI_FORCE_INLINE uint32 extract_bits(uint32 n, int begin, int end) {
   return (n >> begin) & ((1 << (end - begin)) - 1);
 }
 
@@ -377,10 +377,10 @@ struct dense {
   // TODO: fix potential alignment issues
   uint64 bitmask[bitmasked ? (n + 63) / 64 : 1];
 
-  TC_DEVICE TC_FORCE_INLINE dense() {
+  TI_DEVICE TI_FORCE_INLINE dense() {
   }
 
-  TC_DEVICE TC_FORCE_INLINE int32 translate(int i) {  // i is flattened index
+  TI_DEVICE TI_FORCE_INLINE int32 translate(int i) {  // i is flattened index
     int i_translated;
     constexpr int dim = morton_dim;
 #if defined(TLANG_GPU)
@@ -400,26 +400,26 @@ struct dense {
           _pdep_u32(extract_bits(i, n_bit_axis * 2, n_bit_axis * 3),
                     0x24924924);
     } else if (dim == 4) {
-      TC_ASSERT(false);
+      TI_ASSERT(false);
       i_translated = 0;
     }
 #endif
     return i_translated;
   }
 
-  TC_DEVICE TC_FORCE_INLINE child_type *look_up(int i) {
+  TI_DEVICE TI_FORCE_INLINE child_type *look_up(int i) {
     return &children[translate(i)];
   }
 
-  TC_DEVICE TC_FORCE_INLINE int get_n() const {
+  TI_DEVICE TI_FORCE_INLINE int get_n() const {
     return n;
   }
 
-  TC_DEVICE TC_FORCE_INLINE static int constexpr get_max_n() {
+  TI_DEVICE TI_FORCE_INLINE static int constexpr get_max_n() {
     return n;
   }
 
-  TC_DEVICE TC_FORCE_INLINE bool is_active(int i_) {
+  TI_DEVICE TI_FORCE_INLINE bool is_active(int i_) {
     if (bitmasked) {
       // int i = translate(i_);
       int i = i_;
@@ -429,7 +429,7 @@ struct dense {
     }
   }
 
-  TC_DEVICE TC_FORCE_INLINE void deactivate(int i_) {
+  TI_DEVICE TI_FORCE_INLINE void deactivate(int i_) {
     if (bitmasked) {
       int i = translate(i_);
 #if __CUDA_ARCH__
@@ -441,7 +441,7 @@ struct dense {
     }
   }
 
-  TC_DEVICE TC_FORCE_INLINE void activate(int i_,
+  TI_DEVICE TI_FORCE_INLINE void activate(int i_,
                                           const PhysicalIndexGroup &index) {
     if (bitmasked) {
       // if (is_active(i_)) {
@@ -504,11 +504,11 @@ struct hash {
     return i * 129 % table_size;
   }
 
-  TC_DEVICE TC_FORCE_INLINE bool is_active(int i) {
+  TI_DEVICE TI_FORCE_INLINE bool is_active(int i) {
     return look_up(i) != nullptr;
   }
 
-  TC_DEVICE TC_FORCE_INLINE child_type *look_up(int i) {
+  TI_DEVICE TI_FORCE_INLINE child_type *look_up(int i) {
     int k = h(i);
     while (1) {
       if (key[k] == i + 1) {
@@ -521,7 +521,7 @@ struct hash {
     }
   }
 
-  TC_DEVICE TC_FORCE_INLINE void activate(int i,
+  TI_DEVICE TI_FORCE_INLINE void activate(int i,
                                           const PhysicalIndexGroup &index) {
     // TODO: speed up
     // serialize...
@@ -578,7 +578,7 @@ struct hash {
     }
   }
 
-  TC_DEVICE TC_FORCE_INLINE int get_n() const {
+  TI_DEVICE TI_FORCE_INLINE int get_n() const {
     return n;
   }
 
@@ -596,7 +596,7 @@ struct hash {
       // std::cout << "initializing hashed" << std::endl;
   };
 
-  TC_DEVICE TC_FORCE_INLINE child_type *look_up(
+  TI_DEVICE TI_FORCE_INLINE child_type *look_up(
       int i) {  // i is flattened index
     if (data.find(i) == data.end()) {
       return nullptr;
@@ -604,11 +604,11 @@ struct hash {
     return data[i];
   }
 
-  TC_DEVICE TC_FORCE_INLINE bool is_active(int i) {
+  TI_DEVICE TI_FORCE_INLINE bool is_active(int i) {
     return data != nullptr;
   }
 
-  TC_DEVICE TC_FORCE_INLINE void activate(int i,
+  TI_DEVICE TI_FORCE_INLINE void activate(int i,
                                           const PhysicalIndexGroup &index) {
     if (data.find(i) == data.end()) {
       auto ptr = (child_type *)Managers::get<hash>()
@@ -619,11 +619,11 @@ struct hash {
     }
   }
 
-  TC_DEVICE TC_FORCE_INLINE bool is_active(int i) {
+  TI_DEVICE TI_FORCE_INLINE bool is_active(int i) {
     return data.find(i) != data.end();
   }
 
-  TC_DEVICE TC_FORCE_INLINE int get_n() const {
+  TI_DEVICE TI_FORCE_INLINE int get_n() const {
     return data.size();
   }
 
@@ -638,27 +638,27 @@ struct pointer {
   int lock;
   // std::mutex mut;
 
-  TC_DEVICE TC_FORCE_INLINE child_type *look_up(
+  TI_DEVICE TI_FORCE_INLINE child_type *look_up(
       int i) {  // i is flattened index
-    // TC_ASSERT(i == 0);
-    // TC_ASSERT(data != nullptr);
+    // TI_ASSERT(i == 0);
+    // TI_ASSERT(data != nullptr);
     // Returning nullptr is allowed.
     return data;
   }
 
-  TC_DEVICE TC_FORCE_INLINE int get_n() const {
+  TI_DEVICE TI_FORCE_INLINE int get_n() const {
     return 1;
   }
 
-  TC_DEVICE TC_FORCE_INLINE static constexpr int get_max_n() {
+  TI_DEVICE TI_FORCE_INLINE static constexpr int get_max_n() {
     return 1;
   }
 
-  TC_DEVICE TC_FORCE_INLINE bool is_active(int i) {
+  TI_DEVICE TI_FORCE_INLINE bool is_active(int i) {
     return data != nullptr;
   }
 
-  TC_DEVICE TC_FORCE_INLINE void activate(int i,
+  TI_DEVICE TI_FORCE_INLINE void activate(int i,
                                           const PhysicalIndexGroup &index) {
     if (data == nullptr) {
 #if defined(__CUDA_ARCH__)
@@ -705,27 +705,27 @@ struct dynamic {
   child_type data[max_n];
   int n;
 
-  TC_DEVICE dynamic() : n(0) {
+  TI_DEVICE dynamic() : n(0) {
   }
 
-  TC_DEVICE TC_FORCE_INLINE child_type *look_up(
+  TI_DEVICE TI_FORCE_INLINE child_type *look_up(
       int i) {  // i is flattened index
 #if defined(TL_HOST)
     // assuming serial
     n = std::max(n, i + 1);
 #else
-    TC_ASSERT(i < n);
+    TI_ASSERT(i < n);
 #endif
     return &data[i];
   }
 
-  __device__ TC_FORCE_INLINE void clear() {
+  __device__ TI_FORCE_INLINE void clear() {
     n = 0;
   }
 
-  __device__ __host__ TC_FORCE_INLINE void append(child_type t) {
+  __device__ __host__ TI_FORCE_INLINE void append(child_type t) {
     auto tail = atomic_add(&n, 1);
-    TC_ASSERT(tail < max_n);
+    TI_ASSERT(tail < max_n);
     atomic_min(&n, max_n);
 #if __CUDA_ARCH__
     tail = min(tail, (int)(max_n - 1));
@@ -735,25 +735,25 @@ struct dynamic {
     data[tail] = t;
   }
 
-  TC_DEVICE TC_FORCE_INLINE bool is_active(int i) {
+  TI_DEVICE TI_FORCE_INLINE bool is_active(int i) {
     return true;
   }
 
-  TC_DEVICE TC_FORCE_INLINE void deactivate(int i_) {
+  TI_DEVICE TI_FORCE_INLINE void deactivate(int i_) {
     n = 0;  // TODO: fix this
   }
 
-  TC_DEVICE TC_FORCE_INLINE void activate(int i,
+  TI_DEVICE TI_FORCE_INLINE void activate(int i,
                                           const PhysicalIndexGroup &index) {
-    // TC_ASSERT();
+    // TI_ASSERT();
     // Do nothing
   }
 
-  TC_DEVICE TC_FORCE_INLINE int get_n() const {
+  TI_DEVICE TI_FORCE_INLINE int get_n() const {
     return n;
   }
 
-  TC_DEVICE TC_FORCE_INLINE static constexpr int get_max_n() {
+  TI_DEVICE TI_FORCE_INLINE static constexpr int get_max_n() {
     return max_n;
   }
 
diff --git a/taichi/system/benchmark.h b/taichi/system/benchmark.h
index e1901b10edf74..ea0f776c88494 100644
--- a/taichi/system/benchmark.h
+++ b/taichi/system/benchmark.h
@@ -8,7 +8,7 @@
 #include <taichi/common/interface.h>
 #include <taichi/system/timer.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class Benchmark : public Unit {
  protected:
@@ -59,6 +59,6 @@ class Benchmark : public Unit {
   }
 };
 
-TC_INTERFACE(Benchmark)
+TI_INTERFACE(Benchmark)
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/demangling.cpp b/taichi/system/demangling.cpp
index cf5d72a7c05d1..644f6a45a14b3 100644
--- a/taichi/system/demangling.cpp
+++ b/taichi/system/demangling.cpp
@@ -9,12 +9,12 @@
 #include <cxxabi.h>
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 // From https://en.wikipedia.org/wiki/Name_mangling
 
 std::string cpp_demangle(const std::string &mangled_name) {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
   char *demangled_name;
   int status = -1;
   demangled_name =
@@ -23,7 +23,7 @@ std::string cpp_demangle(const std::string &mangled_name) {
   free(demangled_name);
   return ret;
 #else
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 #endif
 }
 
@@ -36,13 +36,13 @@ class Demangling : public Task {
 #if !defined(_WIN64)
       printf("Demangled C++ Identifier: %s\n", cpp_demangle(p).c_str());
 #else
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
 #endif
     }
     return "";
   }
 };
 
-TC_IMPLEMENTATION(Task, Demangling, "demangle")
+TI_IMPLEMENTATION(Task, Demangling, "demangle")
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/memory.cpp b/taichi/system/memory.cpp
index 77879d087b878..956b8e4d79a95 100644
--- a/taichi/system/memory.cpp
+++ b/taichi/system/memory.cpp
@@ -6,7 +6,7 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/embed.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 namespace py = pybind11;
 using namespace py::literals;
@@ -74,7 +74,7 @@ void start_memory_monitoring(std::string output_fn, int pid, real interval) {
   if (pid == -1) {
     pid = PID::get_pid();
   }
-  TC_P(pid);
+  TI_P(pid);
   std::thread th([=]() {
     MemoryMonitor monitor(pid, output_fn);
     while (true) {
@@ -88,10 +88,10 @@ void start_memory_monitoring(std::string output_fn, int pid, real interval) {
 class MemoryTest : public Task {
  public:
   std::string run(const std::vector<std::string> &parameters) override {
-    TC_P(get_memory_usage());
+    TI_P(get_memory_usage());
     Time::sleep(3);
     std::vector<uint8> a(1024ul * 1024 * 1024 * 10, 3);
-    TC_P(get_memory_usage());
+    TI_P(get_memory_usage());
     Time::sleep(3);
     return "";
   }
@@ -111,7 +111,7 @@ class MemoryTest2 : public Task {
   }
 };
 
-TC_IMPLEMENTATION(Task, MemoryTest, "mem_test");
-TC_IMPLEMENTATION(Task, MemoryTest2, "mem_test2");
+TI_IMPLEMENTATION(Task, MemoryTest, "mem_test");
+TI_IMPLEMENTATION(Task, MemoryTest2, "mem_test2");
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/memory.h b/taichi/system/memory.h
index 1d16896d976cd..7ef8888c6fe03 100644
--- a/taichi/system/memory.h
+++ b/taichi/system/memory.h
@@ -1,6 +1,6 @@
 #include "virtual_memory.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class MemoryMonitor {
   // avoid including py::dict
@@ -19,4 +19,4 @@ void start_memory_monitoring(std::string output_fn,
                              int pid = -1,
                              real interval = 1);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/profiler.cpp b/taichi/system/profiler.cpp
index 7114b46eee88b..62af4fac2e64c 100644
--- a/taichi/system/profiler.cpp
+++ b/taichi/system/profiler.cpp
@@ -1,6 +1,6 @@
 #include <taichi/system/profiler.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 void ProfilerRecords::print(ProfilerRecords::Node *node, int depth) {
   auto make_indent = [depth](int additional) {
@@ -96,4 +96,4 @@ void ProfilerRecords::print(ProfilerRecords::Node *node, int depth) {
   }
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/profiler.h b/taichi/system/profiler.h
index 5fdca6066c41b..8f73caa106247 100644
--- a/taichi/system/profiler.h
+++ b/taichi/system/profiler.h
@@ -10,11 +10,11 @@
 #include <vector>
 #include <map>
 #include <memory>
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
 #undef max
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class ProfilerRecords {
  public:
@@ -54,7 +54,7 @@ class ProfilerRecords {
     }
 
     float64 get_averaged_tpe() const {
-      TC_ASSERT(account_tpe);
+      TI_ASSERT(account_tpe);
       return total_time / (float64)total_elements;
     }
 
@@ -162,24 +162,24 @@ class Profiler {
   }
 };
 
-#define TC_PROFILE(name, statements) \
+#define TI_PROFILE(name, statements) \
   {                                  \
     taichi::Profiler _(name);        \
     statements;                      \
   }
 
-#define TC_PROFILER(name) taichi::Profiler _profiler_##__LINE__(name);
+#define TI_PROFILER(name) taichi::Profiler _profiler_##__LINE__(name);
 
-#define TC_PROFILE_TPE(name, statements, elements) \
+#define TI_PROFILE_TPE(name, statements, elements) \
   {                                                \
     taichi::Profiler _(name, elements);            \
     statements;                                    \
   }
 
-#define TI_AUTO_PROF TC_PROFILER(__FUNCTION__)
+#define TI_AUTO_PROF TI_PROFILER(__FUNCTION__)
 
 inline void print_profile_info() {
   ProfilerRecords::get_instance().print();
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/run_tests.cpp b/taichi/system/run_tests.cpp
index bdb0aeb0aaeeb..d6c94ef3cd00f 100644
--- a/taichi/system/run_tests.cpp
+++ b/taichi/system/run_tests.cpp
@@ -7,7 +7,7 @@
 #include <taichi/common/task.h>
 #include <taichi/testing.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class RunTests : public Task {
   virtual std::string run(const std::vector<std::string> &parameters) {
@@ -15,6 +15,6 @@ class RunTests : public Task {
   }
 };
 
-TC_IMPLEMENTATION(Task, RunTests, "test");
+TI_IMPLEMENTATION(Task, RunTests, "test");
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/threading.cpp b/taichi/system/threading.cpp
index 72d6026699ad0..28d54e70c5522 100644
--- a/taichi/system/threading.cpp
+++ b/taichi/system/threading.cpp
@@ -8,7 +8,7 @@
 #include <taichi/system/threading.h>
 #include <thread>
 #include <vector>
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
 #include <windows.h>
 #else
 // Mac and Linux
@@ -17,7 +17,7 @@
 
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 #if defined(min)
 #undef min
@@ -31,14 +31,14 @@ bool test_threading() {
       for (int t = 0; t < 10000000; t++) {
         ret += t * 1e-20;
       }
-      TC_P(int(i + ret + 10 * *(int *)j));
+      TI_P(int(i + ret + 10 * *(int *)j));
     });
   }
   return true;
 }
 
 int PID::get_pid() {
-#if defined(TC_PLATFORM_WINDOWS)
+#if defined(TI_PLATFORM_WINDOWS)
   return (int)GetCurrentProcessId();
 #else
   return (int)getpid();
@@ -46,8 +46,8 @@ int PID::get_pid() {
 }
 
 int PID::get_parent_pid() {
-#if defined(TC_PLATFORM_WINDOWS)
-  TC_NOT_IMPLEMENTED
+#if defined(TI_PLATFORM_WINDOWS)
+  TI_NOT_IMPLEMENTED
   return -1;
 #else
   return (int)getppid();
@@ -79,13 +79,13 @@ void ThreadPool::run(int splits,
     this->context = context;
     this->func = func;
     this->desired_num_threads = std::min(desired_num_threads, max_num_threads);
-    TC_ASSERT(this->desired_num_threads > 0);
-    // TC_P(this->desired_num_threads);
+    TI_ASSERT(this->desired_num_threads > 0);
+    // TI_P(this->desired_num_threads);
     started = false;
     task_head = 0;
     task_tail = splits;
     timestamp++;
-    TC_ASSERT(timestamp < (1LL << 62)); // avoid overflowing here
+    TI_ASSERT(timestamp < (1LL << 62)); // avoid overflowing here
   }
 
   // wake up all slaves
@@ -97,7 +97,7 @@ void ThreadPool::run(int splits,
       return started && running_threads == 0;
     });
   }
-  TC_ASSERT(task_head >= task_tail);
+  TI_ASSERT(task_head >= task_tail);
 }
 
 void ThreadPool::target() {
@@ -165,4 +165,4 @@ ThreadPool::~ThreadPool() {
     th.join();
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/threading.h b/taichi/system/threading.h
index fb1cda7f5f066..046fcbeb0f77e 100644
--- a/taichi/system/threading.h
+++ b/taichi/system/threading.h
@@ -11,7 +11,7 @@
 #include <taichi/common/util.h>
 #include <thread>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using RangeForTaskFunc = void(void *, int i);
 using ParallelFor = void(int n, int num_threads, void *, RangeForTaskFunc func);
@@ -61,4 +61,4 @@ class ThreadPool {
   ~ThreadPool();
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/timer.cpp b/taichi/system/timer.cpp
index 25bb4ff34a8b8..91b66a7d91769 100644
--- a/taichi/system/timer.cpp
+++ b/taichi/system/timer.cpp
@@ -11,7 +11,7 @@
 
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 using namespace std;
 
@@ -20,7 +20,7 @@ std::map<std::string, std::pair<double, int>> Time::Timer::memo;
 std::map<std::string, double> Time::FPSCounter::last_refresh;
 std::map<std::string, int> Time::FPSCounter::counter;
 
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
 
 double Time::get_time() {
   struct timeval tv;
@@ -160,11 +160,11 @@ uint64 Time::get_cycles() {
   __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
   return ((uint64)hi << 32) | lo;
 #else
-  TC_WARN("get_cycles is not implemented in this platform. Returning 0.");
+  TI_WARN("get_cycles is not implemented in this platform. Returning 0.");
   return 0;
 #endif
 }
 
 #endif
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/timer.h b/taichi/system/timer.h
index 569ec5e597562..bd973fc7a66dc 100644
--- a/taichi/system/timer.h
+++ b/taichi/system/timer.h
@@ -9,7 +9,7 @@
 #include <cstdio>
 #include <map>
 #include <taichi/common/util.h>
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
 #include <sys/time.h>
 #else
 #pragma warning(push)
@@ -18,7 +18,7 @@
 #pragma warning(pop)
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 #define TIME(x)                                                      \
   {                                                                  \
@@ -27,7 +27,7 @@ TC_NAMESPACE_BEGIN
     taichi::Time::Timer _(timer_name);                               \
     x;                                                               \
   }
-#define TC_TIME(x) TIME(x)
+#define TI_TIME(x) TIME(x)
 
 #include <stdint.h>
 
@@ -102,4 +102,4 @@ class Time {
   };
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/traceback.cpp b/taichi/system/traceback.cpp
index 22c73ef0382ae..5dc138e5e4a97 100644
--- a/taichi/system/traceback.cpp
+++ b/taichi/system/traceback.cpp
@@ -24,9 +24,9 @@
 #include <memory>
 #include <mutex>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-TC_EXPORT void print_traceback() {
+TI_EXPORT void print_traceback() {
 #ifdef __APPLE__
   static std::mutex traceback_printer_mutex;
   // Modified based on
@@ -171,4 +171,4 @@ TC_EXPORT void print_traceback() {
 #endif
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/unit_dll.h b/taichi/system/unit_dll.h
index c78c7d92eab47..083b05c46a0c5 100644
--- a/taichi/system/unit_dll.h
+++ b/taichi/system/unit_dll.h
@@ -11,7 +11,7 @@
 
 #include <taichi/common/interface.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 class UnitDLL {
  protected:
@@ -26,13 +26,13 @@ class UnitDLL {
  public:
   void load_dll(const std::string dll_path) {
 #ifdef WIN32
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
     // dll = LoadLibrary(dll_path.c_str());
 #else
     dll = dlopen(dll_path.c_str(), RTLD_LAZY);
 #endif
     if (!dll) {
-      TC_ERROR(std::string("Cannot load library: " + dll_path));
+      TI_ERROR(std::string("Cannot load library: " + dll_path));
     }
   }
 
@@ -43,7 +43,7 @@ class UnitDLL {
     auto func = (Func)dlsym(dll, func_name.c_str());
     const char *dlsym_error = dlerror();
     if (dlsym_error) {
-      TC_ERROR(std::string("Cannot load function: ") + dlsym_error);
+      TI_ERROR(std::string("Cannot load function: ") + dlsym_error);
     }
 #endif
     assert_info(func != nullptr, "Function " + func_name + " not found");
@@ -71,7 +71,7 @@ class UnitDLL {
     assert_info(loaded(), "Dll not opened.");
     on_unload();
 #ifdef WIN32
-    TC_P("Not implemented");
+    TI_P("Not implemented");
 #else
     dlclose(dll);
 #endif
@@ -92,4 +92,4 @@ class UnitDLL {
   std::function<void(void)> on_unload;
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/system/virtual_memory.h b/taichi/system/virtual_memory.h
index 11bfeb9f652f5..014cfd5475720 100644
--- a/taichi/system/virtual_memory.h
+++ b/taichi/system/virtual_memory.h
@@ -2,13 +2,13 @@
 
 #include <taichi/common/util.h>
 
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
 #include <sys/mman.h>
 #else
 #include <windows.h>
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 // Cross-platform virtual memory allocator
 class VirtualMemoryAllocator {
@@ -18,45 +18,45 @@ class VirtualMemoryAllocator {
   size_t size;
   explicit VirtualMemoryAllocator(size_t size) : size(size) {
 // http://pages.cs.wisc.edu/~sifakis/papers/SPGrid.pdf Sec 3.1
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
     ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE,
                MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
-    TC_ERROR_IF(ptr == MAP_FAILED, "Virtual memory allocation ({} B) failed.",
+    TI_ERROR_IF(ptr == MAP_FAILED, "Virtual memory allocation ({} B) failed.",
                 size);
 #else
     MEMORYSTATUSEX stat;
     stat.dwLength = sizeof(stat);
     GlobalMemoryStatusEx(&stat);
     if (stat.ullAvailVirtual < size) {
-      TC_P(stat.ullAvailVirtual);
-      TC_P(size);
-      TC_ERROR("Insufficient virtual memory space");
+      TI_P(stat.ullAvailVirtual);
+      TI_P(size);
+      TI_ERROR("Insufficient virtual memory space");
     }
     ptr = VirtualAlloc(nullptr, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
-    TC_ERROR_IF(ptr == nullptr, "Virtual memory allocation ({} B) failed.",
+    TI_ERROR_IF(ptr == nullptr, "Virtual memory allocation ({} B) failed.",
                 size);
 #endif
-    TC_ERROR_IF(((uint64_t)ptr) % page_size != 0,
+    TI_ERROR_IF(((uint64_t)ptr) % page_size != 0,
                 "Allocated address ({:}) is not aligned by page size {}", ptr,
                 page_size);
   }
 
   ~VirtualMemoryAllocator() {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
     if (munmap(ptr, size) != 0)
 #else
     // https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualfree
     // According to MS Doc: size must be when using MEM_RELEASE
     if (!VirtualFree(ptr, 0, MEM_RELEASE))
 #endif
-      TC_ERROR("Failed to free virtual memory ({} B)", size);
+      TI_ERROR("Failed to free virtual memory ({} B)", size);
   }
 };
 
 float64 get_memory_usage_gb(int pid = -1);
 uint64 get_memory_usage(int pid = -1);
 
-#define TC_MEMORY_USAGE(name) \
-  TC_DEBUG("Memory Usage [{}] = {:.2f} GB", name, get_memory_usage_gb());
+#define TI_MEMORY_USAGE(name) \
+  TI_DEBUG("Memory Usage [{}] = {:.2f} GB", name, get_memory_usage_gb());
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/taichi_llvm_context.cpp b/taichi/taichi_llvm_context.cpp
index 632ad19324354..c57dc09cb2325 100644
--- a/taichi/taichi_llvm_context.cpp
+++ b/taichi/taichi_llvm_context.cpp
@@ -35,7 +35,7 @@ TaichiLLVMContext::TaichiLLVMContext(Arch arch) : arch(arch) {
   llvm::remove_fatal_error_handler();
   llvm::install_fatal_error_handler(
       [](void *user_data, const std::string &reason, bool gen_crash_diag) {
-        TC_ERROR("LLVM Fatal Error: {}", reason);
+        TI_ERROR("LLVM Fatal Error: {}", reason);
       },
       nullptr);
 
@@ -50,11 +50,11 @@ TaichiLLVMContext::TaichiLLVMContext(Arch arch) : arch(arch) {
     LLVMInitializeNVPTXTargetInfo();
     LLVMInitializeNVPTXAsmPrinter();
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   }
   ctx = std::make_unique<llvm::LLVMContext>();
-  TC_TRACE("Creating llvm context for arch: {}", arch_name(arch));
+  TI_TRACE("Creating llvm context for arch: {}", arch_name(arch));
   llvm::ExitOnError exit_on_err;
   jit = exit_on_err(TaichiLLVMJIT::create(arch));
 }
@@ -76,8 +76,8 @@ llvm::Type *TaichiLLVMContext::get_data_type(DataType dt) {
   } else if (dt == DataType::f64) {
     return llvm::Type::getDoubleTy(*ctx);
   } else {
-    TC_INFO(data_type_name(dt));
-    TC_NOT_IMPLEMENTED
+    TI_INFO(data_type_name(dt));
+    TI_NOT_IMPLEMENTED
   }
 }
 
@@ -87,8 +87,8 @@ std::string find_existing_command(const std::vector<std::string> &commands) {
       return cmd;
     }
   }
-  TC_P(commands);
-  TC_ERROR("No command found.");
+  TI_P(commands);
+  TI_ERROR("No command found.");
 }
 
 std::string get_runtime_fn(Arch arch) {
@@ -106,8 +106,8 @@ void compile_runtime_bitcode(Arch arch) {
   static std::set<int> runtime_compiled;
   if (runtime_compiled.find((int)arch) == runtime_compiled.end()) {
     auto clang = find_existing_command({"clang-7", "clang"});
-    TC_ASSERT(command_exist("llvm-as"));
-    TC_TRACE("Compiling runtime module bitcode...");
+    TI_ASSERT(command_exist("llvm-as"));
+    TI_TRACE("Compiling runtime module bitcode...");
     auto runtime_folder = get_runtime_dir();
     std::string macro = fmt::format(" -D ARCH_{} ", arch_name(arch));
 #if defined(TI_ARCH_ARM)
@@ -119,7 +119,7 @@ void compile_runtime_bitcode(Arch arch) {
             clang, runtime_folder, runtime_folder, macro)
             .c_str());
     if (ret) {
-      TC_ERROR("Runtime compilation failed.");
+      TI_ERROR("Runtime compilation failed.");
     }
     std::system(fmt::format("llvm-as {}runtime.ll -o {}{}", runtime_folder,
                             runtime_folder, get_runtime_fn(arch))
@@ -146,7 +146,7 @@ std::string libdevice_path() {
   auto cuda_version_major = int(std::atof(cuda_version_string.c_str()));
   return fmt::format("{}/libdevice.{}.bc", folder, cuda_version_major);
 #else
-  TC_NOT_IMPLEMENTED;
+  TI_NOT_IMPLEMENTED;
   return "";
 #endif
 }
@@ -165,16 +165,16 @@ std::unique_ptr<llvm::Module> module_from_bitcode_file(std::string bitcode_path,
       parseBitcodeFile(MemoryBufferRef(bitcode, "runtime_bitcode"), *ctx);
   if (!runtime) {
     auto error = runtime.takeError();
-    TC_WARN("Bitcode loading error message:");
+    TI_WARN("Bitcode loading error message:");
     llvm::errs() << error << "\n";
-    TC_ERROR("Bitcode {} load failure.", bitcode_path);
+    TI_ERROR("Bitcode {} load failure.", bitcode_path);
   }
 
   for (auto &f : *(runtime.get()))
     TaichiLLVMContext::force_inline(&f);
 
   bool module_broken = llvm::verifyModule(*runtime.get(), &llvm::errs());
-  TC_ERROR_IF(module_broken, "Module broken");
+  TI_ERROR_IF(module_broken, "Module broken");
   return std::move(runtime.get());
 }
 
@@ -307,7 +307,7 @@ std::unique_ptr<llvm::Module> TaichiLLVMContext::clone_runtime_module() {
 
   std::unique_ptr<llvm::Module> cloned;
   {
-    TC_PROFILER("clone module");
+    TI_PROFILER("clone module");
     cloned = llvm::CloneModule(*runtime_module);
   }
 
@@ -333,29 +333,29 @@ void TaichiLLVMContext::link_module_with_libdevice(
 
   bool failed = llvm::Linker::linkModules(*module, std::move(libdevice_module));
   if (failed) {
-    TC_ERROR("CUDA libdevice linking failure.");
+    TI_ERROR("CUDA libdevice linking failure.");
   }
 
   for (auto func_name : libdevice_function_names) {
     auto func = module->getFunction(func_name);
     if (!func) {
-      TC_P(func_name);
+      TI_P(func_name);
     } else
       func->setLinkage(Function::InternalLinkage);
   }
 }
 
 std::unique_ptr<llvm::Module> TaichiLLVMContext::clone_struct_module() {
-  TC_ASSERT(struct_module);
+  TI_ASSERT(struct_module);
   return llvm::CloneModule(*struct_module);
 }
 
 void TaichiLLVMContext::set_struct_module(
     const std::unique_ptr<llvm::Module> &module) {
-  TC_ASSERT(module);
+  TI_ASSERT(module);
   if (llvm::verifyModule(*module, &llvm::errs())) {
     module->print(llvm::errs(), nullptr);
-    TC_ERROR("module broken");
+    TI_ERROR("module broken");
   }
   struct_module = llvm::CloneModule(*module);
 }
@@ -375,7 +375,7 @@ llvm::Value *TaichiLLVMContext::get_constant(DataType dt, T t) {
   } else if (dt == DataType::u64) {
     return llvm::ConstantInt::get(*ctx, llvm::APInt(64, t, false));
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
@@ -399,7 +399,7 @@ llvm::Value *TaichiLLVMContext::get_constant(T t) {
              std::is_same_v<TargetType, uint64>) {
     return llvm::ConstantInt::get(*ctx, llvm::APInt(64, (uint64)t, true));
   } else {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 }
 
@@ -436,13 +436,13 @@ void TaichiLLVMContext::print_huge_functions() {
     int c = num_instructions(&f);
     if (c > 100) {
       total_big_inst += c;
-      TC_INFO("Loaded runtime function: {} (inst. count= {})",
+      TI_INFO("Loaded runtime function: {} (inst. count= {})",
               std::string(f.getName()), c);
     }
     total_inst += c;
   }
-  TC_P(total_inst);
-  TC_P(total_big_inst);
+  TI_P(total_inst);
+  TI_P(total_big_inst);
 }
 
 template llvm::Value *TaichiLLVMContext::get_constant(float32 t);
diff --git a/taichi/taichi_llvm_context.h b/taichi/taichi_llvm_context.h
index 6db87ddf92e0c..b89030a7d6ed0 100644
--- a/taichi/taichi_llvm_context.h
+++ b/taichi/taichi_llvm_context.h
@@ -36,7 +36,7 @@ class TaichiLLVMContext {
     using FuncT = typename std::function<T>;
     auto ret =
         FuncT((function_pointer_type<FuncT>)jit_lookup_name(jit.get(), name));
-    TC_ASSERT(ret != nullptr);
+    TI_ASSERT(ret != nullptr);
     return ret;
   }
 
diff --git a/taichi/tlang.cpp b/taichi/tlang.cpp
index 9bf2cdbe9e22b..c8d17e73c4121 100644
--- a/taichi/tlang.cpp
+++ b/taichi/tlang.cpp
@@ -10,7 +10,7 @@ void layout(const std::function<void()> &body) {
 }
 
 Expr global_new(Expr id_expr, DataType dt) {
-  TC_ASSERT(id_expr.is<IdExpression>());
+  TI_ASSERT(id_expr.is<IdExpression>());
   auto ret = Expr(std::make_shared<GlobalVariableExpression>(
       dt, id_expr.cast<IdExpression>()->id));
   return ret;
diff --git a/taichi/tlang.h b/taichi/tlang.h
index 6b2c09dc82333..dd7566a8cf4f2 100644
--- a/taichi/tlang.h
+++ b/taichi/tlang.h
@@ -2,7 +2,7 @@
 
 #pragma once
 
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
 #include <cuda_runtime.h>
 #endif
 #include <taichi/common/util.h>
@@ -11,7 +11,7 @@
 namespace taichi {
 static_assert(
     sizeof(real) == sizeof(float32),
-    "Please build the taichi compiler with single precision (TC_USE_DOUBLE=0)");
+    "Please build the taichi compiler with single precision (TI_USE_DOUBLE=0)");
 namespace math {
 inline int maximum(int a) {
   return a;
@@ -19,7 +19,7 @@ inline int maximum(int a) {
 }  // namespace math
 }  // namespace taichi
 #include <set>
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
 #include <dlfcn.h>
 #endif
 
@@ -206,21 +206,21 @@ std::tuple<Matrix, Matrix, Matrix> sifakis_svd(const Matrix &a);
 
 TLANG_NAMESPACE_END
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 inline Dict parse_param(std::vector<std::string> cli_param) {
   Dict dict;
   for (auto &s : cli_param) {
     auto div = s.find('=');
     if (div == std::string::npos) {
-      TC_INFO("CLI parameter format: key=value, e.g. file_name=test.bin.");
+      TI_INFO("CLI parameter format: key=value, e.g. file_name=test.bin.");
       exit(-1);
     }
     dict.set(s.substr(0, div), s.substr(div + 1));
   }
-  TC_P(dict);
+  TI_P(dict);
   return dict;
 }
 
 void write_partio(std::vector<Vector3> positions, const std::string &file_name);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/tlang_util.cpp b/taichi/tlang_util.cpp
index 5b8eef3e97e3f..52275009747d2 100644
--- a/taichi/tlang_util.cpp
+++ b/taichi/tlang_util.cpp
@@ -4,7 +4,7 @@
 #include <taichi/system/timer.h>
 #include <taichi/math/linalg.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 namespace Tlang {
 
@@ -17,7 +17,7 @@ real get_cpu_frequency() {
     Time::sleep(1);
     uint64 elapsed_cycles = Time::get_cycles() - cycles;
     auto frequency = real(std::round(elapsed_cycles / 1e8_f64) / 10.0_f64);
-    TC_INFO("CPU frequency = {:.2f} GHz ({} cycles per second)", frequency,
+    TI_INFO("CPU frequency = {:.2f} GHz ({} cycles per second)", frequency,
             elapsed_cycles);
     cpu_frequency = frequency;
   }
@@ -67,7 +67,7 @@ int default_simd_width(Arch arch) {
   } else if (arch == Arch::cuda) {
     return 32;
   } else {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
     return -1;
   }
 }
@@ -233,6 +233,7 @@ std::string snode_op_type_name(SNodeOpType type) {
     REGISTER_TYPE(deactivate);
     REGISTER_TYPE(append);
     REGISTER_TYPE(clear);
+    REGISTER_TYPE(undefined);
 #undef REGISTER_TYPE
   }
   return type_names[type];
@@ -246,7 +247,7 @@ std::string CompileConfig::compiler_config() {
   std::string omp_flag = "";
 #endif
 
-#if defined(TC_PLATFORM_OSX)
+#if defined(TI_PLATFORM_OSX)
   std::string linking = "-undefined dynamic_lookup";
 #else
   std::string linking = "-ltaichi_core";
@@ -315,7 +316,7 @@ std::string CompileConfig::compile_cmd(const std::string &input,
 }
 
 bool command_exist(const std::string &command) {
-#if defined(TC_PLATFORM_UNIX)
+#if defined(TI_PLATFORM_UNIX)
   if (std::system(fmt::format("which {} > /dev/null 2>&1", command).c_str())) {
     return false;
   } else {
@@ -346,7 +347,7 @@ CompileConfig::CompileConfig() {
   force_vectorized_global_load = false;
   force_vectorized_global_store = false;
   debug = false;
-#if defined(TC_PLATFORM_OSX)
+#if defined(TI_PLATFORM_OSX)
   gcc_version = -1;
 #else
   gcc_version = -2;  // not 7 for faster compilation
@@ -354,11 +355,11 @@ CompileConfig::CompileConfig() {
 #endif
   if (!use_llvm) {
     if (gcc_version == -2 && !command_exist("clang-7")) {
-      TC_WARN("Command clang-7 not found. Attempting clang");
+      TI_WARN("Command clang-7 not found. Attempting clang");
       gcc_version = -1;
     }
     if (gcc_version == -1 && !command_exist("clang")) {
-      TC_WARN("Command clang not found. Attempting gcc-6");
+      TI_WARN("Command clang not found. Attempting gcc-6");
       gcc_version = 6;
     }
   }
@@ -389,7 +390,7 @@ std::string CompileConfig::compiler_name() {
 }
 
 std::string CompileConfig::gcc_opt_flag() {
-  TC_ASSERT(0 <= external_optimization_level &&
+  TI_ASSERT(0 <= external_optimization_level &&
             external_optimization_level < 5);
   if (external_optimization_level < 4) {
     return fmt::format("-O{}", external_optimization_level);
@@ -438,13 +439,13 @@ void initialize_benchmark() {
     return;
   }
   initialized = true;
-#if defined(TC_PLATFORM_LINUX)
+#if defined(TI_PLATFORM_LINUX)
   std::ifstream noturbo("/sys/devices/system/cpu/intel_pstate/no_turbo");
   char c;
   noturbo >> c;
-  TC_WARN_IF(c != '1',
+  TI_WARN_IF(c != '1',
              "You seem to be running the benchmark with Intel Turboboost.");
 #endif
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/tlang_util.h b/taichi/tlang_util.h
index dd745a57a788f..238b34ca0a096 100644
--- a/taichi/tlang_util.h
+++ b/taichi/tlang_util.h
@@ -71,7 +71,7 @@ inline DataType get_data_type() {
   } else if (std::is_same<T, uint64>()) {
     return DataType::u64;
   } else {
-    TC_NOT_IMPLEMENTED;
+    TI_NOT_IMPLEMENTED;
   }
   return DataType::unknown;
 }
@@ -164,7 +164,8 @@ enum class SNodeOpType : int {
   activate,
   deactivate,
   append,
-  clear
+  clear,
+  undefined
 };
 
 std::string snode_op_type_name(SNodeOpType type);
@@ -212,8 +213,8 @@ class TypedConstant {
     } else if (dt == DataType::f64) {
       return fmt::format("{}", val_f64);
     } else {
-      TC_P(data_type_name(dt));
-      TC_NOT_IMPLEMENTED
+      TI_P(data_type_name(dt));
+      TI_NOT_IMPLEMENTED
       return "";
     }
   }
@@ -230,28 +231,28 @@ class TypedConstant {
     } else if (dt == DataType::f64) {
       return val_f64 == o.val_f64;
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
       return false;
     }
   }
 
   int32 &val_int32() {
-    TC_ASSERT(get_data_type<int32>() == dt);
+    TI_ASSERT(get_data_type<int32>() == dt);
     return val_i32;
   }
 
   float32 &val_float32() {
-    TC_ASSERT(get_data_type<float32>() == dt);
+    TI_ASSERT(get_data_type<float32>() == dt);
     return val_f32;
   }
 
   int64 &val_int64() {
-    TC_ASSERT(get_data_type<int64>() == dt);
+    TI_ASSERT(get_data_type<int64>() == dt);
     return val_i64;
   }
 
   float64 &val_float64() {
-    TC_ASSERT(get_data_type<float64>() == dt);
+    TI_ASSERT(get_data_type<float64>() == dt);
     return val_f64;
   }
 };
@@ -274,8 +275,8 @@ inline std::string make_list(const std::vector<std::string> &data,
   } else if (bracket == "(") {
     ret += ")";
   } else if (bracket != "") {
-    TC_P(bracket);
-    TC_NOT_IMPLEMENTED
+    TI_P(bracket);
+    TI_NOT_IMPLEMENTED
   }
   return ret;
 }
@@ -353,7 +354,7 @@ bool command_exist(const std::string &command);
 
 TLANG_NAMESPACE_END
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 void initialize_benchmark();
 
 template <typename T, typename... Args, typename FP = T (*)(Args...)>
@@ -370,4 +371,4 @@ template <typename T>
 using function_pointer_type =
     decltype(function_pointer_helper(std::declval<T>()));
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/transforms/alg_simp.cpp b/taichi/transforms/alg_simp.cpp
index d407e0a937ad4..688dee393c28c 100644
--- a/taichi/transforms/alg_simp.cpp
+++ b/taichi/transforms/alg_simp.cpp
@@ -49,7 +49,7 @@ class AlgSimp : public BasicStmtVisitor {
     else if (data_type == DataType::f64)
       return val.val_float64() == 0;
     else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
       return false;
     }
   }
@@ -70,7 +70,7 @@ class AlgSimp : public BasicStmtVisitor {
     else if (data_type == DataType::f64)
       return val.val_float64() == 1;
     else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
       return false;
     }
   }
diff --git a/taichi/transforms/demote_atomics.cpp b/taichi/transforms/demote_atomics.cpp
index cdc027ce5233c..172bae7b9a354 100644
--- a/taichi/transforms/demote_atomics.cpp
+++ b/taichi/transforms/demote_atomics.cpp
@@ -34,7 +34,7 @@ class DemoteAtomics : public BasicStmtVisitor {
         auto new_stmts = VecStatement();
         Stmt *load;
         if (is_local) {
-          TC_ASSERT(stmt->width() == 1);
+          TI_ASSERT(stmt->width() == 1);
           load = new_stmts.push_back<LocalLoadStmt>(LocalAddress(ptr, 0));
           auto add =
               new_stmts.push_back<BinaryOpStmt>(BinaryOpType::add, load, val);
diff --git a/taichi/transforms/demote_dense_struct_fors.cpp b/taichi/transforms/demote_dense_struct_fors.cpp
index 8abd35b54d1e5..0978b84d8e43a 100644
--- a/taichi/transforms/demote_dense_struct_fors.cpp
+++ b/taichi/transforms/demote_dense_struct_fors.cpp
@@ -7,7 +7,7 @@ VecStatement convert_to_range_for(StructForStmt *struct_for) {
   auto loop_var = ret.push_back<AllocaStmt>(DataType::i32);
   auto lower = ret.push_back<ConstStmt>(TypedConstant(0));
   std::vector<SNode *> snodes;
-  auto snode = struct_for->snode->parent;
+  auto snode = struct_for->snode;
   int total_bits = 0;
   while (snode->type != SNodeType::root) {
     snodes.push_back(snode);
@@ -15,7 +15,7 @@ VecStatement convert_to_range_for(StructForStmt *struct_for) {
     snode = snode->parent;
   }
   std::reverse(snodes.begin(), snodes.end());
-  TC_ASSERT(total_bits <= 31);
+  TI_ASSERT(total_bits <= 31);
 
   auto upper_bound = 1 << total_bits;
   auto upper = ret.push_back<ConstStmt>(TypedConstant(upper_bound));
@@ -28,7 +28,7 @@ VecStatement convert_to_range_for(StructForStmt *struct_for) {
 
   std::vector<int> physical_indices;
 
-  TC_ASSERT(snodes.back()->num_active_indices == (int)old_loop_vars.size());
+  TI_ASSERT(snodes.back()->num_active_indices == (int)old_loop_vars.size());
   for (int i = 0; i < (int)old_loop_vars.size(); i++) {
     new_loop_vars.push_back(body_header.push_back<ConstStmt>(TypedConstant(0)));
     physical_indices.push_back(snodes.back()->physical_index_position[i]);
@@ -111,9 +111,10 @@ void demote_dense_struct_fors(IRNode *root) {
   for (int i = 0; i < (int)block_body.size(); i++) {
     auto s_ = block_body[i];
     if (auto s = s_->cast<StructForStmt>()) {
-      auto snode = s->snode->parent;
+      auto snode = s->snode;
+      TI_P(snode_type_name(snode->type));
       bool all_dense = true;
-      while (snode->type != SNodeType::root) {
+      while (all_dense && snode->type != SNodeType::root) {
         if (snode->type != SNodeType::dense) {
           all_dense = false;
         }
diff --git a/taichi/transforms/die.cpp b/taichi/transforms/die.cpp
index d95aa03d5885d..41028a3078192 100644
--- a/taichi/transforms/die.cpp
+++ b/taichi/transforms/die.cpp
@@ -45,7 +45,7 @@ class DIE : public IRVisitor {
   }
 
   void visit(Stmt *stmt) {
-    TC_ASSERT(!stmt->erased);
+    TI_ASSERT(!stmt->erased);
     if (phase == 0) {
       register_usage(stmt);
     } else {
diff --git a/taichi/transforms/insert_scratch_pad.cpp b/taichi/transforms/insert_scratch_pad.cpp
index 2ec831e1ce301..f934a7c83c0ec 100644
--- a/taichi/transforms/insert_scratch_pad.cpp
+++ b/taichi/transforms/insert_scratch_pad.cpp
@@ -16,7 +16,7 @@ class AccessAnalysis : public IRVisitor {
     allow_undefined_visitor = true;
     invoke_default_visitor = false;
 
-    TC_WARN(
+    TI_WARN(
         "Using the size of scratch_opt[0].second as the snode size to cache");
     generate_block_indices(for_stmt->scratch_opt[0].second->parent, {}, 0);
 
@@ -58,9 +58,9 @@ class AccessAnalysis : public IRVisitor {
       /*
       for (auto it: pads->pads) {
         //snodes.push_back(it.first);
-        TC_P(it.first->node_type_name);
+        TI_P(it.first->node_type_name);
       }
-      TC_P(snode->node_type_name);
+      TI_P(snode->node_type_name);
       */
       if (!pads->has(snode)) {
         continue;
@@ -76,24 +76,24 @@ class AccessAnalysis : public IRVisitor {
           offsets[i].first = diff.low;
           offsets[i].second = diff.high;
           /*
-          TC_P(ptr->name());
-          TC_P(diff.low);
-          TC_P(diff.high);
+          TI_P(ptr->name());
+          TI_P(diff.low);
+          TI_P(diff.high);
           */
         } else {
           /*
-          TC_P(i);
-          TC_P(for_stmt->loop_vars[i]->raw_name());
-          TC_P(ptr->indices[i]->raw_name());
+          TI_P(i);
+          TI_P(for_stmt->loop_vars[i]->raw_name());
+          TI_P(ptr->indices[i]->raw_name());
           */
           matching_indices = false;
         }
       }
       if (matching_indices) {
         /*
-        TC_INFO("Detected regular access");
+        TI_INFO("Detected regular access");
         for (int i = 0; i < num_indices; i++) {
-          TC_P(offsets[i]);
+          TI_P(offsets[i]);
         }
         */
         for (const auto &bind : block_indices) {
@@ -117,12 +117,12 @@ class AccessAnalysis : public IRVisitor {
 
   // Do not eliminate global data access
   void visit(GlobalLoadStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);  // TODO: support vectorization
+    TI_ASSERT(stmt->width() == 1);  // TODO: support vectorization
     access(stmt->ptr, AccessFlag::read);
   }
 
   void visit(GlobalStoreStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);  // TODO: support vectorization
+    TI_ASSERT(stmt->width() == 1);  // TODO: support vectorization
     access(stmt->ptr, AccessFlag::write);
   }
 
@@ -133,7 +133,7 @@ class AccessAnalysis : public IRVisitor {
   }
 
   void visit(Stmt *stmt) override {
-    TC_ASSERT(!stmt->is_container_statement());
+    TI_ASSERT(!stmt->is_container_statement());
   }
 };
 
diff --git a/taichi/transforms/ir_printer.cpp b/taichi/transforms/ir_printer.cpp
index 42d5708213fd6..9a0affa92e635 100644
--- a/taichi/transforms/ir_printer.cpp
+++ b/taichi/transforms/ir_printer.cpp
@@ -86,7 +86,7 @@ class IRPrinter : public IRVisitor {
     if (stmt->val) {
       extras += ", val = " + stmt->val->name();
     }
-    if (!stmt->indices.empty()){
+    if (!stmt->indices.empty()) {
       extras += " index [";
       for (int i = 0; i < (int)stmt->indices.size(); i++) {
         extras += fmt::format("{}", stmt->indices[i]->name());
@@ -343,13 +343,9 @@ class IRPrinter : public IRVisitor {
   }
 
   void visit(SNodeLookupStmt *stmt) override {
-    print(
-        "{} = [{}][{}]::lookup({}, {}) coord = {} activate = {}", stmt->name(),
-        stmt->snode->get_node_type_name_hinted(), stmt->snode->type_name(),
-        stmt->input_snode->name(), stmt->input_index->name(),
-        make_list<Stmt *>(stmt->global_indices,
-                          [&](Stmt *const &stmt) { return stmt->name(); }, "{"),
-        stmt->activate);
+    print("{} = [{}][{}]::lookup({}, {}) activate = {}", stmt->name(),
+          stmt->snode->get_node_type_name_hinted(), stmt->snode->type_name(),
+          stmt->input_snode->name(), stmt->input_index->name(), stmt->activate);
   }
 
   void visit(GetChStmt *stmt) override {
@@ -418,7 +414,7 @@ class IRPrinter : public IRVisitor {
             stmt->snode->get_node_type_name_hinted());
     } else {
       print("{} = offloaded {} {{", stmt->name(), details);
-      TC_ASSERT(stmt->body);
+      TI_ASSERT(stmt->body);
       stmt->body->accept(this);
       print("}}");
     }
diff --git a/taichi/transforms/loop_vectorize.cpp b/taichi/transforms/loop_vectorize.cpp
index 4ecd68b3e4a37..b260403ea33c3 100644
--- a/taichi/transforms/loop_vectorize.cpp
+++ b/taichi/transforms/loop_vectorize.cpp
@@ -48,7 +48,7 @@ class LoopVectorize : public IRVisitor {
   void visit(SNodeOpStmt *stmt) override {
     if (vectorize == 1)
       return;
-    // TC_NOT_IMPLEMENTED;
+    // TI_NOT_IMPLEMENTED;
     /*
     stmt->snodes.repeat(vectorize);
     stmt->ret_type.width *= vectorize;
diff --git a/taichi/transforms/lower_access.cpp b/taichi/transforms/lower_access.cpp
index cf4b89e116948..e352e565cc4f7 100644
--- a/taichi/transforms/lower_access.cpp
+++ b/taichi/transforms/lower_access.cpp
@@ -51,10 +51,10 @@ class LowerAccess : public IRVisitor {
                         SNode *snode,
                         std::vector<Stmt *> indices,
                         bool activate,
-                        bool return_is_active) {
-    if (return_is_active) {
+                        SNodeOpType snode_op = SNodeOpType::undefined) {
+    if (snode_op == SNodeOpType::is_active) {
       // For ti.is_active
-      TC_ASSERT(!activate);
+      TI_ASSERT(!activate);
     }
     // emit a sequence of micro access ops
     std::set<SNode *> nodes_on_loop;
@@ -69,7 +69,8 @@ class LowerAccess : public IRVisitor {
       snodes.push_front(snode);
 
     Stmt *last = lowered.push_back<GetRootStmt>();
-    for (int i = 0; i < (int)snodes.size() - 1 + int(return_is_active); i++) {
+    int path_inc = int(snode_op != SNodeOpType::undefined);
+    for (int i = 0; i < (int)snodes.size() - 1 + path_inc; i++) {
       auto snode = snodes[i];
       std::vector<Stmt *> lowered_indices;
       std::vector<int> strides;
@@ -113,10 +114,9 @@ class LowerAccess : public IRVisitor {
       auto linearized =
           lowered.push_back<LinearizeStmt>(lowered_indices, strides);
 
-      if (return_is_active && i == (int)snodes.size() - 1) {
+      if (snode_op != SNodeOpType::undefined && i == (int)snodes.size() - 1) {
         // Create a SNodeOp querying if element i(linearized) of node is active
-        lowered.push_back<SNodeOpStmt>(SNodeOpType::is_active, snodes[i], last,
-                                       linearized);
+        lowered.push_back<SNodeOpStmt>(snode_op, snodes[i], last, linearized);
       } else {
         auto lookup = lowered.push_back<SNodeLookupStmt>(
             snode, last, linearized,
@@ -132,7 +132,7 @@ class LowerAccess : public IRVisitor {
 
   VecStatement lower_vector_ptr(GlobalPtrStmt *ptr,
                                 bool activate,
-                                bool return_is_active = false) {
+                                SNodeOpType snode_op = SNodeOpType::undefined) {
     VecStatement lowered;
     std::vector<Stmt *> lowered_pointers;
     for (int i = 0; i < ptr->width(); i++) {
@@ -143,9 +143,8 @@ class LowerAccess : public IRVisitor {
         indices.push_back(extractor.get());
         lowered.push_back(std::move(extractor));
       }
-      lower_scalar_ptr(lowered, ptr->snodes[i], indices, activate,
-                       return_is_active);
-      TC_ASSERT(lowered.size());
+      lower_scalar_ptr(lowered, ptr->snodes[i], indices, activate, snode_op);
+      TI_ASSERT(lowered.size());
       lowered_pointers.push_back(lowered.back().get());
     }
     // create shuffle
@@ -178,11 +177,12 @@ class LowerAccess : public IRVisitor {
   }
 
   void visit(SNodeOpStmt *stmt) override {
-    if (stmt->op_type == SNodeOpType::is_active) {
+    if (SNodeOpStmt::activation_related(stmt->op_type) &&
+        stmt->snode->type != SNodeType::dynamic) {
       if (stmt->val == nullptr) {
         std::vector<SNode *> snodes(stmt->width(), stmt->snode);
         auto proxy_ptr = Stmt::make_typed<GlobalPtrStmt>(snodes, stmt->indices);
-        auto lowered = lower_vector_ptr(proxy_ptr.get(), false, true);
+        auto lowered = lower_vector_ptr(proxy_ptr.get(), false, stmt->op_type);
         stmt->replace_with(std::move(lowered), true);
         throw IRModified();
       } else {
diff --git a/taichi/transforms/lower_ast.cpp b/taichi/transforms/lower_ast.cpp
index 68c9a01fe7cbc..6d3a892f92e90 100644
--- a/taichi/transforms/lower_ast.cpp
+++ b/taichi/transforms/lower_ast.cpp
@@ -45,7 +45,7 @@ class LowerAST : public IRVisitor {
   void visit(FrontendAllocaStmt *stmt) override {
     auto block = stmt->parent;
     auto ident = stmt->ident;
-    TC_ASSERT(block->local_var_alloca.find(ident) ==
+    TI_ASSERT(block->local_var_alloca.find(ident) ==
               block->local_var_alloca.end());
     auto lowered = std::make_unique<AllocaStmt>(stmt->ret_type.data_type);
     block->local_var_alloca.insert(std::make_pair(ident, lowered.get()));
@@ -101,7 +101,7 @@ class LowerAST : public IRVisitor {
   }
 
   void visit(FrontendBreakStmt *stmt) override {
-    TC_ASSERT_INFO(
+    TI_ASSERT_INFO(
         capturing_loop->is<WhileStmt>(),
         "The loop capturing 'break' must be a while loop instead of for loop.");
     auto while_stmt = capturing_loop->as<WhileStmt>();
@@ -162,7 +162,7 @@ class LowerAST : public IRVisitor {
     }
 
     if (stmt->is_ranged()) {
-      TC_ASSERT(stmt->loop_var_id.size() == 1);
+      TI_ASSERT(stmt->loop_var_id.size() == 1);
       auto begin = stmt->begin;
       auto end = stmt->end;
       begin->flatten(flattened);
@@ -177,10 +177,21 @@ class LowerAST : public IRVisitor {
       for (int i = 0; i < (int)stmt->loop_var_id.size(); i++) {
         vars[i] = stmt->parent->lookup_var(stmt->loop_var_id[i]);
       }
+      auto snode = stmt->global_var.cast<GlobalVariableExpression>()->snode;
+      if (snode->type == SNodeType::place) {
+        /* Note:
+         * for i in x:
+         *   x[i] = 0
+         *
+         * has the same effect as
+         *
+         * for i in x.parent():
+         *   x[i] = 0 */
+        snode = snode->parent;
+      }
       auto &&new_for = std::make_unique<StructForStmt>(
-          vars, stmt->global_var.cast<GlobalVariableExpression>()->snode,
-          std::move(stmt->body), stmt->vectorize, stmt->parallelize,
-          stmt->block_dim);
+          vars, snode, std::move(stmt->body), stmt->vectorize,
+          stmt->parallelize, stmt->block_dim);
       new_for->scratch_opt = stmt->scratch_opt;
       flattened.push_back(std::move(new_for));
     }
@@ -223,7 +234,7 @@ class LowerAST : public IRVisitor {
           assign->parent->lookup_var(assign->lhs.cast<IdExpression>()->id),
           expr->stmt);
     } else {  // global variable
-      TC_ASSERT(assign->lhs.is<GlobalPtrExpression>());
+      TI_ASSERT(assign->lhs.is<GlobalPtrExpression>());
       auto global_ptr = assign->lhs.cast<GlobalPtrExpression>();
       global_ptr->flatten(flattened);
       flattened.push_back<GlobalStoreStmt>(flattened.back().get(), expr->stmt);
@@ -249,7 +260,7 @@ class LowerAST : public IRVisitor {
           stmt->parent->lookup_var(stmt->dest.cast<IdExpression>()->id);
       flattened.push_back<AtomicOpStmt>(stmt->op_type, alloca, expr->stmt);
     } else {  // global variable
-      TC_ASSERT(stmt->dest.is<GlobalPtrExpression>());
+      TI_ASSERT(stmt->dest.is<GlobalPtrExpression>());
       auto global_ptr = stmt->dest.cast<GlobalPtrExpression>();
       global_ptr->flatten(flattened);
       flattened.push_back<AtomicOpStmt>(stmt->op_type, flattened.back().get(),
@@ -275,8 +286,19 @@ class LowerAST : public IRVisitor {
       indices_stmt[i] = stmt->indices[i]->stmt;
     }
 
-    auto ptr = flattened.push_back<GlobalPtrStmt>(stmt->snode, indices_stmt);
-    flattened.push_back<SNodeOpStmt>(stmt->op_type, stmt->snode, ptr, val_stmt);
+    if (stmt->snode->type == SNodeType::dynamic) {
+      auto ptr = flattened.push_back<GlobalPtrStmt>(stmt->snode, indices_stmt);
+      flattened.push_back<SNodeOpStmt>(stmt->op_type, stmt->snode, ptr,
+                                       val_stmt);
+    } else if (stmt->snode->type == SNodeType::pointer ||
+               stmt->snode->type == SNodeType::hash ||
+               stmt->snode->type == SNodeType::dynamic) {
+      TI_ASSERT(SNodeOpStmt::activation_related(stmt->op_type));
+      flattened.push_back<SNodeOpStmt>(stmt->op_type, stmt->snode,
+                                       indices_stmt);
+    } else {
+      TI_NOT_IMPLEMENTED
+    }
 
     stmt->parent->replace_with(stmt, std::move(flattened));
     throw IRModified();
@@ -329,4 +351,4 @@ void lower(IRNode *root) {
 
 }  // namespace irpass
 
-TLANG_NAMESPACE_END
\ No newline at end of file
+TLANG_NAMESPACE_END
diff --git a/taichi/transforms/make_adjoint.cpp b/taichi/transforms/make_adjoint.cpp
index 076e9383e792a..7d40402ba99a1 100644
--- a/taichi/transforms/make_adjoint.cpp
+++ b/taichi/transforms/make_adjoint.cpp
@@ -105,9 +105,9 @@ class MakeAdjoint : public IRVisitor {
     auto alloca_ = adjoint(primal);
     if (!alloca_ || alloca_->is<ConstStmt>())
       return;  // primal may be int variable
-    TC_ASSERT(alloca_->is<AllocaStmt>());
+    TI_ASSERT(alloca_->is<AllocaStmt>());
     auto alloca = alloca_->as<AllocaStmt>();
-    TC_ASSERT(alloca->width() == 1);
+    TI_ASSERT(alloca->width() == 1);
     auto local_load = insert<LocalLoadStmt>(LocalAddress(alloca, 0));
     insert<LocalStoreStmt>(alloca, add(local_load, value));
   }
@@ -148,7 +148,7 @@ class MakeAdjoint : public IRVisitor {
     } else if (stmt->op_type == UnaryOpType::cos) {
       accumulate(stmt->operand, negate(mul(adjoint(stmt), sin(stmt->operand))));
     } else if (stmt->op_type == UnaryOpType::tan) {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     } else if (stmt->op_type == UnaryOpType::tanh) {
       accumulate(stmt->operand,
                  mul(adjoint(stmt), sub(constant(1), sqr(stmt))));
@@ -176,8 +176,8 @@ class MakeAdjoint : public IRVisitor {
     } else if (stmt->op_type == UnaryOpType::logic_not) {
       // do nothing
     } else {
-      TC_P(unary_op_type_name(stmt->op_type));
-      TC_NOT_IMPLEMENTED
+      TI_P(unary_op_type_name(stmt->op_type));
+      TI_NOT_IMPLEMENTED
     }
   }
 
@@ -213,13 +213,13 @@ class MakeAdjoint : public IRVisitor {
     } else if (is_comparison(bin->op_type) || is_bit_op(bin->op_type)) {
       // do nothing
     } else {
-      TC_WARN("gradient of binary op {}", binary_op_type_name(bin->op_type));
-      TC_NOT_IMPLEMENTED
+      TI_WARN("gradient of binary op {}", binary_op_type_name(bin->op_type));
+      TI_NOT_IMPLEMENTED
     }
   }
 
   void visit(TernaryOpStmt *stmt) override {
-    TC_ASSERT(stmt->op_type == TernaryOpType::select);
+    TI_ASSERT(stmt->op_type == TernaryOpType::select);
     auto zero = insert<ConstStmt>(TypedConstant(stmt->ret_type.data_type));
     accumulate(stmt->op2,
                insert<TernaryOpStmt>(TernaryOpType::select, stmt->op1,
@@ -238,7 +238,7 @@ class MakeAdjoint : public IRVisitor {
 
       current_block = new_if->true_statements.get();
       for (int i = 0; i < if_stmt->true_statements->statements.size(); i++) {
-        // TC_ASSERT(if_stmt->true_statements[i])
+        // TI_ASSERT(if_stmt->true_statements[i])
         if_stmt->true_statements->statements[i]->accept(this);
       }
 
@@ -257,11 +257,11 @@ class MakeAdjoint : public IRVisitor {
   }
 
   void visit(WhileControlStmt *stmt) override {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 
   void visit(WhileStmt *stmt) override {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 
   void visit(RangeForStmt *for_stmt) override {
@@ -284,13 +284,13 @@ class MakeAdjoint : public IRVisitor {
 
   void visit(LocalLoadStmt *stmt) override {
     // do nothing
-    // TC_WARN("needs impl when loading something other than loop var");
+    // TI_WARN("needs impl when loading something other than loop var");
   }
 
-  void visit(LocalStoreStmt *stmt) override{TC_NOT_IMPLEMENTED}
+  void visit(LocalStoreStmt *stmt) override{TI_NOT_IMPLEMENTED}
 
   Stmt *load(Stmt *alloc) {
-    TC_ASSERT(alloc != nullptr);
+    TI_ASSERT(alloc != nullptr);
     if (alloc->is<AllocaStmt>()) {
       return insert<LocalLoadStmt>(LocalAddress(alloc, 0));
     } else {
@@ -313,7 +313,7 @@ class MakeAdjoint : public IRVisitor {
   void visit(GlobalLoadStmt *stmt) override {
     // issue global store to adjoint
     GlobalPtrStmt *ptr = stmt->ptr->as<GlobalPtrStmt>();
-    TC_ASSERT(ptr->width() == 1);
+    TI_ASSERT(ptr->width() == 1);
     auto snodes = ptr->snodes;
     if (!snodes[0]->has_grad()) {
       // No adjoint SNode. Do nothing
@@ -323,7 +323,7 @@ class MakeAdjoint : public IRVisitor {
       // gradients stopped, do nothing.
       return;
     }
-    TC_ASSERT(snodes[0]->get_grad() != nullptr);
+    TI_ASSERT(snodes[0]->get_grad() != nullptr);
     snodes[0] = snodes[0]->get_grad();
     auto adj_ptr = insert<GlobalPtrStmt>(snodes, ptr->indices);
     insert<AtomicOpStmt>(AtomicOpType::add, adj_ptr, load(adjoint(stmt)));
@@ -332,13 +332,13 @@ class MakeAdjoint : public IRVisitor {
   void visit(GlobalStoreStmt *stmt) override {
     // erase and replace with global load adjoint
     GlobalPtrStmt *ptr = stmt->ptr->as<GlobalPtrStmt>();
-    TC_ASSERT(ptr->width() == 1);
+    TI_ASSERT(ptr->width() == 1);
     auto snodes = ptr->snodes;
     if (!snodes[0]->has_grad()) {
       // no gradient (likely integer types)
       return;
     }
-    TC_ASSERT(snodes[0]->get_grad() != nullptr);
+    TI_ASSERT(snodes[0]->get_grad() != nullptr);
     snodes[0] = snodes[0]->get_grad();
     auto adjoint_ptr = insert<GlobalPtrStmt>(snodes, ptr->indices);
     accumulate(stmt->data, insert<GlobalLoadStmt>(adjoint_ptr));
@@ -348,10 +348,10 @@ class MakeAdjoint : public IRVisitor {
   void visit(AtomicOpStmt *stmt) override {
     // erase and replace with global load adjoint
     GlobalPtrStmt *ptr = stmt->dest->as<GlobalPtrStmt>();
-    TC_ASSERT(ptr->width() == 1);
+    TI_ASSERT(ptr->width() == 1);
     auto snodes = ptr->snodes;
     if (snodes[0]->has_grad()) {
-      TC_ASSERT(snodes[0]->get_grad() != nullptr);
+      TI_ASSERT(snodes[0]->get_grad() != nullptr);
       snodes[0] = snodes[0]->get_grad();
       auto adjoint_ptr = insert<GlobalPtrStmt>(snodes, ptr->indices);
       accumulate(stmt->val, insert<GlobalLoadStmt>(adjoint_ptr));
@@ -362,7 +362,7 @@ class MakeAdjoint : public IRVisitor {
   }
 
   void visit(ElementShuffleStmt *stmt) override {
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
   }
 
   void visit(AssertStmt *stmt) override {
diff --git a/taichi/transforms/offload.cpp b/taichi/transforms/offload.cpp
index c99833778c0d7..4869b23551b8d 100644
--- a/taichi/transforms/offload.cpp
+++ b/taichi/transforms/offload.cpp
@@ -90,8 +90,8 @@ class Offloader {
     std::vector<SNode *> path;
     // leaf is the place (scalar)
     // leaf->parent is the leaf block
-    // so listgen should be invoked from the root to leaf->parent->parent
-    for (auto p = leaf->parent; p; p = p->parent) {
+    // so listgen should be invoked from the root to leaf->parent
+    for (auto p = leaf; p; p = p->parent) {
       path.push_back(p);
     }
     std::reverse(path.begin(), path.end());
@@ -154,10 +154,10 @@ class IdentifyLocalVars : public BasicStmtVisitor {
   std::size_t global_offset;
 
   std::size_t allocate_global(VectorType type) {
-    TC_ASSERT(type.width == 1);
+    TI_ASSERT(type.width == 1);
     auto ret = global_offset;
     global_offset += data_type_size(type.data_type);
-    TC_ASSERT(global_offset < taichi_global_tmp_buffer_size);
+    TI_ASSERT(global_offset < taichi_global_tmp_buffer_size);
     return ret;
   }
 
@@ -181,7 +181,7 @@ class IdentifyLocalVars : public BasicStmtVisitor {
   }
 
   void visit(AllocaStmt *stmt) override {
-    TC_ASSERT(current_offloaded);
+    TI_ASSERT(current_offloaded);
     inst_to_offloaded[stmt] = current_offloaded;
   }
 
@@ -195,20 +195,20 @@ class IdentifyLocalVars : public BasicStmtVisitor {
   }
 
   void visit(LocalLoadStmt *stmt) override {
-    TC_ASSERT(current_offloaded);
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(current_offloaded);
+    TI_ASSERT(stmt->width() == 1);
     test_and_allocate(stmt->ptr[0].var);
   }
 
   void visit(LocalStoreStmt *stmt) override {
-    TC_ASSERT(current_offloaded);
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(current_offloaded);
+    TI_ASSERT(stmt->width() == 1);
     test_and_allocate(stmt->ptr);
   }
 
   void visit(AtomicOpStmt *stmt) override {
-    TC_ASSERT(current_offloaded);
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(current_offloaded);
+    TI_ASSERT(stmt->width() == 1);
     if (stmt->dest->is<AllocaStmt>()) {
       test_and_allocate(stmt->dest);
     }
@@ -321,7 +321,7 @@ class PromoteLocals : public BasicStmtVisitor {
   }
 
   void visit(LocalLoadStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto alloca = stmt->ptr[0].var;
     if (local_to_global_offset.find(alloca) == local_to_global_offset.end())
       return;
@@ -338,7 +338,7 @@ class PromoteLocals : public BasicStmtVisitor {
   }
 
   void visit(LocalStoreStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto alloca = stmt->ptr;
     if (local_to_global_offset.find(alloca) == local_to_global_offset.end())
       return;
@@ -355,7 +355,7 @@ class PromoteLocals : public BasicStmtVisitor {
   }
 
   void visit(AtomicOpStmt *stmt) override {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto alloca = stmt->dest;
     if (local_to_global_offset.find(alloca) == local_to_global_offset.end())
       return;
@@ -386,7 +386,7 @@ class PromoteLocals : public BasicStmtVisitor {
 
 void insert_gc(IRNode *root) {
   auto *b = dynamic_cast<Block *>(root);
-  TC_ASSERT(b);
+  TI_ASSERT(b);
   std::vector<std::pair<int, std::vector<SNode *>>> gc_statements;
   for (int i = 0; i < (int)b->statements.size(); i++) {
     auto snodes = irpass::gather_deactivations(b->statements[i].get());
diff --git a/taichi/transforms/reverse_segments.cpp b/taichi/transforms/reverse_segments.cpp
index a903227cd0ce4..a98db824a775f 100644
--- a/taichi/transforms/reverse_segments.cpp
+++ b/taichi/transforms/reverse_segments.cpp
@@ -59,7 +59,7 @@ void reverse_segments(IRNode *root) {
   }
     */
   if (has_for && has_non_for)
-    TC_ERROR(
+    TI_ERROR(
         "Invalid program input for autodiff. Please check the documentation "
         "for the \"Kernel Simplicity Rule\".");
   for (auto &sblock : statement_blocks) {
diff --git a/taichi/transforms/simplify.cpp b/taichi/transforms/simplify.cpp
index a12111475b8a2..a6d3b6a5db639 100644
--- a/taichi/transforms/simplify.cpp
+++ b/taichi/transforms/simplify.cpp
@@ -43,7 +43,7 @@ class BasicBlockSimplify : public IRVisitor {
     if (stmt->is_container_statement())
       return;
     else {
-      TC_ERROR("Visitor for non-container stmt undefined.");
+      TI_ERROR("Visitor for non-container stmt undefined.");
     }
   }
 
@@ -287,7 +287,7 @@ class BasicBlockSimplify : public IRVisitor {
       auto block = stmt->parent;
       Stmt *containing_statement = stmt;
       auto stmt_id = block->locate(containing_statement);
-      TC_ASSERT(stmt_id != -1);
+      TI_ASSERT(stmt_id != -1);
       for (int i = stmt_id - 1; i >= 0; i--) {
         auto &bstmt = block->statements[i];
         // Find a previous store
@@ -705,8 +705,8 @@ class BasicBlockSimplify : public IRVisitor {
       auto snode = stmt->snode;
       // compute offset...
       for (int i = 0; i < (int)snode->ch.size(); i++) {
-        TC_ASSERT(snode->ch[i]->type == SNodeType::place);
-        TC_ASSERT(snode->ch[i]->dt == DataType::i32 ||
+        TI_ASSERT(snode->ch[i]->type == SNodeType::place);
+        TI_ASSERT(snode->ch[i]->dt == DataType::i32 ||
                   snode->ch[i]->dt == DataType::f32);
       }
 
@@ -973,7 +973,7 @@ class Simplify : public IRVisitor {
   }
 
   void visit(StructForStmt *for_stmt) override {
-    TC_ASSERT(current_struct_for == nullptr);
+    TI_ASSERT(current_struct_for == nullptr);
     current_struct_for = for_stmt;
     for_stmt->body->accept(this);
     current_struct_for = nullptr;
diff --git a/taichi/transforms/slp_vectorize.cpp b/taichi/transforms/slp_vectorize.cpp
index 5f994ab1a28aa..702ee58cc1583 100644
--- a/taichi/transforms/slp_vectorize.cpp
+++ b/taichi/transforms/slp_vectorize.cpp
@@ -124,7 +124,7 @@ class BasicBlockSLP : public IRVisitor {
         return rec->find(pack[i])->second.first;
       }
     }
-    TC_ASSERT((int)pack.size() == slp_width);
+    TI_ASSERT((int)pack.size() == slp_width);
     for (int i = 0; i < (int)existing_stmts.size(); i++) {
       bool match = true;
       for (int j = 0; j < slp_width; j++) {
@@ -163,7 +163,7 @@ class BasicBlockSLP : public IRVisitor {
         return pack[i];
       }
       // fmt::print(" {} ", pack[i]->id);
-      TC_ASSERT(visited.find(pack[i]) == visited.end());
+      TI_ASSERT(visited.find(pack[i]) == visited.end());
       visited.insert(pack[i]);
     }
     // fmt::print("\n");
@@ -177,7 +177,7 @@ class BasicBlockSLP : public IRVisitor {
         operands.push_back(build(operand_pack));
       }
     } else {
-      TC_ASSERT(pack[0]->width() == 1);
+      TI_ASSERT(pack[0]->width() == 1);
       // Pack previous store or alloca.
       for (int i = 0; i < (int)pack[0]->as<LocalLoadStmt>()->ptr.size(); i++) {
         Pack operand_pack;
@@ -193,19 +193,19 @@ class BasicBlockSLP : public IRVisitor {
             operand_pack.push_back(previous);
         }
         if (operand_pack.size() != 0) {
-          TC_ASSERT((int)operand_pack.size() == slp_width);
+          TI_ASSERT((int)operand_pack.size() == slp_width);
           operands.push_back(build(operand_pack));
         }
       }
     }
     tmp_operands = operands;
     building_pack = pack;
-    TC_ASSERT(tmp_stmt == nullptr);
+    TI_ASSERT(tmp_stmt == nullptr);
     pack[0]->accept(this);
-    TC_ASSERT(tmp_stmt != nullptr);
+    TI_ASSERT(tmp_stmt != nullptr);
     tmp_operands.clear();
     for (int i = 0; i < (int)building_pack.size(); i++) {
-      TC_ASSERT(rec->find(building_pack[i]) == rec->end());
+      TI_ASSERT(rec->find(building_pack[i]) == rec->end());
       (*rec)[building_pack[i]] = std::make_pair(tmp_stmt.get(), i);
     }
     auto ret = new_stmts.push_back(std::move(tmp_stmt));
@@ -217,7 +217,7 @@ class BasicBlockSLP : public IRVisitor {
         break;
       }
     }
-    TC_ASSERT(pos != -1);
+    TI_ASSERT(pos != -1);
     position[ret] = pos;
     existing_stmts.push_back(std::make_pair(pack, ret));
     /*
@@ -231,7 +231,7 @@ class BasicBlockSLP : public IRVisitor {
   }
 
   void replace(Stmt *old_stmt, Stmt *new_stmt, int offset){
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
       /*
       for (int i = 0; i < (int)block->statements.size(); i++) {
         auto stmt = block->statements[i].get();
@@ -239,7 +239,7 @@ class BasicBlockSLP : public IRVisitor {
           continue;  // this is a statement being SLP vectorized..
         for (auto ope : stmt->operands) {
           if (*ope == old_stmt) {
-            TC_ASSERT(old_stmt->width() == 1);
+            TI_ASSERT(old_stmt->width() == 1);
             auto shuffle =
                 Stmt::make<ElementShuffleStmt>(VectorElement(new_stmt, 0));
             *ope = shuffle.get();
@@ -264,7 +264,7 @@ class BasicBlockSLP : public IRVisitor {
     auto &stmts = input_statements;
     Stmt *last_last_stmt = nullptr;
     while (1) {
-      // TC_INFO("Seeding...");
+      // TI_INFO("Seeding...");
       // Find the last statement
       Stmt *last_stmt = nullptr;
       for (int i = stmts.size() - 1; i >= 0; i--) {
@@ -278,7 +278,7 @@ class BasicBlockSLP : public IRVisitor {
         break;
       }
       if (last_stmt == last_last_stmt) {
-        TC_ERROR("Last stmt duplicated. Loop detected.");
+        TI_ERROR("Last stmt duplicated. Loop detected.");
       }
       last_last_stmt = last_stmt;
 
@@ -298,11 +298,11 @@ class BasicBlockSLP : public IRVisitor {
       }
 
       if ((int)seed_statements.size() != width) {
-        TC_ERROR("Cannot find enough {} seed statements to start SLP search.",
+        TI_ERROR("Cannot find enough {} seed statements to start SLP search.",
                  width);
       }
       std::reverse(seed_statements.begin(), seed_statements.end());
-      // TC_P(last_stmt->id);
+      // TI_P(last_stmt->id);
       build(seed_statements);
     }
     sort(new_stmts);
@@ -335,7 +335,7 @@ class BasicBlockSLP : public IRVisitor {
               if (replaced)
                 break;
             }
-            TC_ASSERT(replaced);
+            TI_ASSERT(replaced);
           }
         }
       } else if (stmt_->is<LocalStoreStmt>()) {
@@ -347,18 +347,18 @@ class BasicBlockSLP : public IRVisitor {
           for (auto &rec : existing_stmts) {
             for (int j = 0; j < slp_width; j++) {
               if (rec.first[j] == old_stmt) {
-                TC_ASSERT(j == 0);
+                TI_ASSERT(j == 0);
                 // replace alloca
                 stmt->ptr = rec.second;
                 replaced = true;
-                TC_WARN("Replacing alloca in store");
+                TI_WARN("Replacing alloca in store");
                 break;
               }
             }
             if (replaced)
               break;
           }
-          TC_ASSERT(replaced);
+          TI_ASSERT(replaced);
         }
       }
     }
@@ -422,8 +422,8 @@ class SLPVectorize : public IRVisitor {
       // until the end...
       second_pragma_slp_location = (int)block->statements.size();
     }
-    // TC_P(block->statements[first_pragma_slp_location]->id);
-    TC_ASSERT(
+    // TI_P(block->statements[first_pragma_slp_location]->id);
+    TI_ASSERT(
         block->statements[first_pragma_slp_location]->is<PragmaSLPStmt>());
 
     std::vector<pStmt> shuffles;
@@ -443,7 +443,7 @@ class SLPVectorize : public IRVisitor {
         auto s = stmt->as<ElementShuffleStmt>();
         for (int l = 0; l < stmt->width(); l++) {
           auto old_stmt = s->elements[l].stmt;
-          TC_ASSERT(s->elements[l].index == 0);
+          TI_ASSERT(s->elements[l].index == 0);
           if (rec.find(old_stmt) != rec.end()) {
             s->elements[l].stmt = rec[old_stmt].first;
             s->elements[l].index = rec[old_stmt].second;
@@ -456,7 +456,7 @@ class SLPVectorize : public IRVisitor {
             auto shuffle = Stmt::make<ElementShuffleStmt>(
                 VectorElement(rec[ope].first, rec[ope].second));
             /*
-            TC_INFO("Shuffle {}: replaced {} with {}", shuffle->id, ope->id,
+            TI_INFO("Shuffle {}: replaced {} with {}", shuffle->id, ope->id,
                     rec[ope].first->id);
                     */
             stmt->set_operand(i, shuffle.get());
@@ -467,16 +467,16 @@ class SLPVectorize : public IRVisitor {
     }
 
     for (int i = 0; i < (int)shuffles.size(); i++) {
-      // TC_P(shuffles[i]->id);
+      // TI_P(shuffles[i]->id);
       block->insert(std::move(shuffles[i]), first_pragma_slp_location + i + 1);
     }
     second_pragma_slp_location += (int)shuffles.size();
 
-    // TC_P(block->statements[first_pragma_slp_location]->id);
-    TC_ASSERT(
+    // TI_P(block->statements[first_pragma_slp_location]->id);
+    TI_ASSERT(
         block->statements[first_pragma_slp_location]->is<PragmaSLPStmt>());
     // irpass::print(context->root());
-    // TC_P(block->statements[first_pragma_slp_location]->id);
+    // TI_P(block->statements[first_pragma_slp_location]->id);
     int current_slp_width = block->statements[first_pragma_slp_location]
                                 ->as<PragmaSLPStmt>()
                                 ->slp_width;
@@ -486,15 +486,15 @@ class SLPVectorize : public IRVisitor {
          i++) {
       vec.push_back(block->statements[i].get());
     }
-    // TC_INFO("Before SLP");
+    // TI_INFO("Before SLP");
     auto slp = BasicBlockSLP();
     block->replace_statements_in_range(
         first_pragma_slp_location, second_pragma_slp_location,
         slp.run(block, current_slp_width, vec, &rec));
     /*
-    TC_P(first_pragma_slp_location);
-    TC_P(second_pragma_slp_location);
-    TC_INFO("SLPed...");
+    TI_P(first_pragma_slp_location);
+    TI_P(second_pragma_slp_location);
+    TI_INFO("SLPed...");
     */
     throw IRModified();
   }
@@ -530,8 +530,8 @@ class SLPVectorize : public IRVisitor {
           if (trivial) {
             stmt->ptr = ptr->elements[0].stmt;
           } else {
-            TC_P(stmt->id);
-            TC_ERROR(
+            TI_P(stmt->id);
+            TI_ERROR(
                 "Local store with non trivial shuffling is not yet handled.");
           }
         }
diff --git a/taichi/transforms/type_check.cpp b/taichi/transforms/type_check.cpp
index 4b579dfd82bf1..57c5cc566ba69 100644
--- a/taichi/transforms/type_check.cpp
+++ b/taichi/transforms/type_check.cpp
@@ -44,9 +44,9 @@ class TypeCheck : public IRVisitor {
   }
 
   void visit(AtomicOpStmt *stmt) {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     if (stmt->val->ret_type.data_type != stmt->dest->ret_type.data_type) {
-      TC_WARN("Atomic add ({} to {}) may lose precision.",
+      TI_WARN("Atomic add ({} to {}) may lose precision.",
               data_type_name(stmt->val->ret_type.data_type),
               data_type_name(stmt->dest->ret_type.data_type));
       stmt->val = insert_type_cast_before(stmt, stmt->val,
@@ -58,7 +58,7 @@ class TypeCheck : public IRVisitor {
   }
 
   void visit(LocalLoadStmt *stmt) {
-    TC_ASSERT(stmt->width() == 1);
+    TI_ASSERT(stmt->width() == 1);
     auto lookup = stmt->ptr[0].var->ret_type;
     stmt->ret_type = lookup;
   }
@@ -75,7 +75,7 @@ class TypeCheck : public IRVisitor {
                                            stmt->ptr->ret_type.data_type);
     }
     if (stmt->ptr->ret_type.data_type != ret_type) {
-      TC_WARN(
+      TI_WARN(
           "Local store may lose precision (target = {}, value = {}, "
           "stmt_id = {}) at",
           stmt->ptr->ret_data_type_name(), stmt->data->ret_data_type_name(),
@@ -97,24 +97,24 @@ class TypeCheck : public IRVisitor {
     if (stmt->snodes)
       stmt->ret_type.data_type = stmt->snodes[0]->dt;
     else
-      TC_WARN("Type inference failed: snode is nullptr.");
+      TI_WARN("Type inference failed: snode is nullptr.");
     for (int l = 0; l < stmt->snodes.size(); l++) {
       if (stmt->snodes[l]->parent->num_active_indices != 0 &&
           stmt->snodes[l]->parent->num_active_indices != stmt->indices.size()) {
-        TC_ERROR("{} has {} indices. Indexed with {}.",
+        TI_ERROR("{} has {} indices. Indexed with {}.",
                  stmt->snodes[l]->parent->node_type_name,
                  stmt->snodes[l]->parent->num_active_indices,
                  stmt->indices.size());
       }
     }
     for (int i = 0; i < stmt->indices.size(); i++) {
-      TC_ASSERT_INFO(
+      TI_ASSERT_INFO(
           is_integral(stmt->indices[i]->ret_type.data_type),
           "Taichi tensors must be accessed with integral indices (e.g., "
           "i32/i64). It seems that you have used a float point number as "
           "an index. You can cast that to an integer using int(). Also note "
           "that ti.floor(ti.f32) returns f32.");
-      TC_ASSERT(stmt->indices[i]->ret_type.width == stmt->snodes.size());
+      TI_ASSERT(stmt->indices[i]->ret_type.width == stmt->snodes.size());
     }
   }
 
@@ -127,7 +127,7 @@ class TypeCheck : public IRVisitor {
                                            stmt->ptr->ret_type.data_type);
     }
     if (stmt->ptr->ret_type.data_type != promoted) {
-      TC_WARN("Global store may lose precision: {} <- {}, at",
+      TI_WARN("Global store may lose precision: {} <- {}, at",
               stmt->ptr->ret_data_type_name(), input_type, stmt->tb);
     }
     stmt->ret_type = stmt->ptr->ret_type;
@@ -135,7 +135,7 @@ class TypeCheck : public IRVisitor {
 
   void visit(RangeForStmt *stmt) {
     /*
-    TC_ASSERT(block->local_variables.find(stmt->loop_var) ==
+    TI_ASSERT(block->local_variables.find(stmt->loop_var) ==
               block->local_variables.end());
               */
     mark_as_if_const(stmt->begin, VectorType(1, DataType::i32));
@@ -162,13 +162,13 @@ class TypeCheck : public IRVisitor {
     }
     if (is_trigonometric(stmt->op_type) &&
         !is_real(stmt->operand->ret_type.data_type)) {
-      TC_ERROR("Trigonometric operator takes real inputs only. At {}",
+      TI_ERROR("Trigonometric operator takes real inputs only. At {}",
                stmt->tb);
     }
     if ((stmt->op_type == UnaryOpType::floor ||
          stmt->op_type == UnaryOpType::ceil) &&
         !is_real(stmt->operand->ret_type.data_type)) {
-      TC_ERROR("floor/ceil takes real inputs only. At {}", stmt->tb);
+      TI_ERROR("floor/ceil takes real inputs only. At {}", stmt->tb);
     }
   }
 
@@ -204,14 +204,14 @@ class TypeCheck : public IRVisitor {
   void visit(BinaryOpStmt *stmt) {
     auto error = [&](std::string comment = "") {
       if (comment == "") {
-        TC_WARN("Error: type mismatch (left = {}, right = {}, stmt_id = {}) at",
+        TI_WARN("Error: type mismatch (left = {}, right = {}, stmt_id = {}) at",
                 stmt->lhs->ret_data_type_name(),
                 stmt->rhs->ret_data_type_name(), stmt->id);
       } else {
-        TC_WARN(comment + " at");
+        TI_WARN(comment + " at");
       }
       fmt::print(stmt->tb);
-      TC_WARN("Compilation stopped due to type mismatch.");
+      TI_WARN("Compilation stopped due to type mismatch.");
       exit(-1);
     };
     if (stmt->lhs->ret_type.data_type == DataType::unknown &&
@@ -270,9 +270,9 @@ class TypeCheck : public IRVisitor {
     if (stmt->op_type == TernaryOpType::select) {
       auto ret_type = promoted_type(stmt->op2->ret_type.data_type,
                                     stmt->op3->ret_type.data_type);
-      TC_ASSERT(stmt->op1->ret_type.data_type == DataType::i32)
-      TC_ASSERT(stmt->op1->ret_type.width == stmt->op2->ret_type.width);
-      TC_ASSERT(stmt->op2->ret_type.width == stmt->op3->ret_type.width);
+      TI_ASSERT(stmt->op1->ret_type.data_type == DataType::i32)
+      TI_ASSERT(stmt->op1->ret_type.width == stmt->op2->ret_type.width);
+      TI_ASSERT(stmt->op2->ret_type.width == stmt->op3->ret_type.width);
       if (ret_type != stmt->op2->ret_type.data_type) {
         auto cast_stmt = insert_type_cast_before(stmt, stmt->op2, ret_type);
         stmt->op2 = cast_stmt;
@@ -283,34 +283,34 @@ class TypeCheck : public IRVisitor {
       }
       stmt->ret_type = VectorType(stmt->op1->width(), ret_type);
     } else {
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
     }
   }
 
   void visit(ElementShuffleStmt *stmt) {
-    TC_ASSERT(stmt->elements.size() != 0);
+    TI_ASSERT(stmt->elements.size() != 0);
     stmt->element_type() = stmt->elements[0].stmt->element_type();
   }
 
   void visit(RangeAssumptionStmt *stmt) {
-    TC_ASSERT(stmt->input->ret_type == stmt->base->ret_type);
+    TI_ASSERT(stmt->input->ret_type == stmt->base->ret_type);
     stmt->ret_type = stmt->input->ret_type;
   }
 
   void visit(ArgLoadStmt *stmt) {
     auto &args = get_current_program().get_current_kernel().args;
-    TC_ASSERT(0 <= stmt->arg_id && stmt->arg_id < args.size());
-    TC_ASSERT(!args[stmt->arg_id].is_return_value);
+    TI_ASSERT(0 <= stmt->arg_id && stmt->arg_id < args.size());
+    TI_ASSERT(!args[stmt->arg_id].is_return_value);
     stmt->ret_type = VectorType(1, args[stmt->arg_id].dt);
   }
 
   void visit(ArgStoreStmt *stmt) {
     auto &args = get_current_program().get_current_kernel().args;
-    TC_ASSERT(0 <= stmt->arg_id && stmt->arg_id < args.size());
+    TI_ASSERT(0 <= stmt->arg_id && stmt->arg_id < args.size());
     auto arg = args[stmt->arg_id];
     auto arg_type = arg.dt;
-    TC_ASSERT(arg.is_return_value);
-    TC_ASSERT(stmt->val->ret_type.data_type == arg_type);
+    TI_ASSERT(arg.is_return_value);
+    TI_ASSERT(stmt->val->ret_type.data_type == arg_type);
     stmt->ret_type = VectorType(1, arg_type);
   }
 
diff --git a/taichi/transforms/vector_split.cpp b/taichi/transforms/vector_split.cpp
index 0ff5cbbb3765e..a9de232fce518 100644
--- a/taichi/transforms/vector_split.cpp
+++ b/taichi/transforms/vector_split.cpp
@@ -34,11 +34,11 @@ class BasicBlockVectorSplit : public IRVisitor {
 
   Stmt *lookup(Stmt *old, int index) {
     if (origin2split.find(old) == origin2split.end()) {
-      TC_WARN("VectorSplitter looking for statement outside current block?");
+      TI_WARN("VectorSplitter looking for statement outside current block?");
       return old;
     } else {
-      TC_ASSERT(0 <= index);
-      TC_ASSERT(index < (int)origin2split[old].size());
+      TI_ASSERT(0 <= index);
+      TI_ASSERT(index < (int)origin2split[old].size());
       return origin2split[old][index];
     }
   }
@@ -48,7 +48,7 @@ class BasicBlockVectorSplit : public IRVisitor {
     for (int i = 0; i < (int)statements.size(); i++) {
       auto stmt = statements[i].get();
       if (stmt->width() > max_width) {
-        TC_ASSERT(stmt->width() % max_width == 0);
+        TI_ASSERT(stmt->width() % max_width == 0);
         current_split_factor = stmt->width() / max_width;
         current_split.resize(current_split_factor);
         need_split = true;
@@ -115,7 +115,7 @@ class BasicBlockVectorSplit : public IRVisitor {
                 origin2split[old_var][stmt->ptr[l].offset / max_width];
             stmt->ptr[l].var = new_var;
             stmt->ptr[l].offset %= max_width;
-            // TC_WARN("replaced...");
+            // TI_WARN("replaced...");
           }
         }
       }
@@ -253,7 +253,7 @@ class BasicBlockVectorSplit : public IRVisitor {
   }
 
   void visit(WhileControlStmt *stmt) override {
-    TC_ASSERT(need_split == false);
+    TI_ASSERT(need_split == false);
     for (int i = 0; i < current_split_factor; i++) {
       current_split[i] = Stmt::make<WhileControlStmt>(lookup(stmt->mask, i),
                                                       lookup(stmt->cond, i));
diff --git a/taichi/unified_allocator.cpp b/taichi/unified_allocator.cpp
index d3e7d9d7e3b84..cdfe6840044aa 100644
--- a/taichi/unified_allocator.cpp
+++ b/taichi/unified_allocator.cpp
@@ -1,6 +1,6 @@
 // Virtual memory allocator for CPU/GPU
 
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
 #include "cuda_utils.h"
 #endif
 #include "tlang_util.h"
@@ -23,15 +23,15 @@ UnifiedAllocator::UnifiedAllocator(std::size_t size, Arch arch)
     //  - kernel B is getting loaded via cuModuleLoadDataEx (and get stuck for
     //  some reason)
     // So we need a mutex here...
-    TC_TRACE("Allocating unified (CPU+GPU) address space of size {} MB",
+    TI_TRACE("Allocating unified (CPU+GPU) address space of size {} MB",
              size / 1024 / 1024);
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
     std::lock_guard<std::mutex> _(cuda_context->lock);
     check_cuda_error(cudaMallocManaged(&_cuda_data, size));
     if (_cuda_data == nullptr) {
-      TC_ERROR("GPU memory allocation failed.");
+      TI_ERROR("GPU memory allocation failed.");
     }
-#if !defined(TI_ARCH_ARM) && !defined(TC_PLATFORM_WINDOWS)
+#if !defined(TI_ARCH_ARM) && !defined(TI_PLATFORM_WINDOWS)
     // Assuming ARM devices have shared CPU/GPU memory and do no support
     // memAdvise; CUDA on Windows has limited support for unified memory
     check_cuda_error_as_warning(
@@ -46,20 +46,20 @@ UnifiedAllocator::UnifiedAllocator(std::size_t size, Arch arch)
                   */
     data = (uint8 *)_cuda_data;
 #else
-    TC_NOT_IMPLEMENTED
+    TI_NOT_IMPLEMENTED
 #endif
   } else {
-    TC_TRACE("Allocating virtual address space of size {} MB",
+    TI_TRACE("Allocating virtual address space of size {} MB",
              size / 1024 / 1024);
     cpu_vm = std::make_unique<VirtualMemoryAllocator>(size);
     data = (uint8 *)cpu_vm->ptr;
   }
-  TC_ASSERT(data != nullptr);
-  TC_ASSERT(uint64(data) % 4096 == 0);
+  TI_ASSERT(data != nullptr);
+  TI_ASSERT(uint64(data) % 4096 == 0);
 
   head = data;
   tail = head + size;
-  TC_TRACE("Memory allocated. Allocation time = {:.3} s", Time::get_time() - t);
+  TI_TRACE("Memory allocated. Allocation time = {:.3} s", Time::get_time() - t);
 }
 
 taichi::Tlang::UnifiedAllocator::~UnifiedAllocator() {
@@ -67,10 +67,10 @@ taichi::Tlang::UnifiedAllocator::~UnifiedAllocator() {
     return;
   }
   if (arch_ == Arch::cuda) {
-#if defined(CUDA_FOUND)
+#if defined(TI_WITH_CUDA)
     check_cuda_error(cudaFree(_cuda_data));
 #else
-    TC_ERROR("No CUDA support");
+    TI_ERROR("No CUDA support");
 #endif
   }
 }
diff --git a/taichi/unified_allocator.h b/taichi/unified_allocator.h
index b0beba3313d1f..21ec76cb8fd5b 100644
--- a/taichi/unified_allocator.h
+++ b/taichi/unified_allocator.h
@@ -42,7 +42,7 @@ class UnifiedAllocator {
       return nullptr;
     } else {
       // success
-      TC_ASSERT((std::size_t)ret % alignment == 0);
+      TI_ASSERT((std::size_t)ret % alignment == 0);
       return ret;
     }
   }
diff --git a/taichi/util/zip.cpp b/taichi/util/zip.cpp
index 2087aef6bfb8a..fce0adbb6f5b4 100644
--- a/taichi/util/zip.cpp
+++ b/taichi/util/zip.cpp
@@ -12,7 +12,7 @@
 
 #include "miniz.h"
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 namespace zip {
 
@@ -26,7 +26,7 @@ inline std::string get_file_name_from_whole_path(const std::string &fn) {
 
 void write(std::string fn, const uint8 *data, std::size_t len) {
   mz_bool status;
-  TC_ERROR_UNLESS(taichi::ends_with(fn, ".tcb.zip"),
+  TI_ERROR_UNLESS(taichi::ends_with(fn, ".tcb.zip"),
                   "Filename must end with .tcb.zip");
 
   std::string fn_uncompressed = get_file_name_from_whole_path(fn);
@@ -41,7 +41,7 @@ void write(std::string fn, const uint8 *data, std::size_t len) {
       reinterpret_cast<char *>(const_cast<uint8 *>(data)), len, s_pComment,
       (uint16)strlen(s_pComment), MZ_BEST_COMPRESSION);
   if (!status) {
-    TC_ERROR("mz_zip_add_mem_to_archive_file_in_place failed!\n");
+    TI_ERROR("mz_zip_add_mem_to_archive_file_in_place failed!\n");
   }
 }
 
@@ -50,7 +50,7 @@ void write(const std::string &fn, const std::string &data) {
 }
 
 std::vector<uint8> read(const std::string fn, bool verbose) {
-  TC_ERROR_UNLESS(taichi::ends_with(fn, ".tcb.zip"),
+  TI_ERROR_UNLESS(taichi::ends_with(fn, ".tcb.zip"),
                   "Filename must end with .tcb.zip");
 
   mz_zip_archive zip_archive;
@@ -60,15 +60,15 @@ std::vector<uint8> read(const std::string fn, bool verbose) {
   memset(&zip_archive, 0, sizeof(zip_archive));
   status = mz_zip_reader_init_file(&zip_archive, fn.c_str(), 0);
   if (!status) {
-    TC_ERROR("mz_zip_reader_init_file() failed!\n");
+    TI_ERROR("mz_zip_reader_init_file() failed!\n");
   }
   if (!mz_zip_reader_file_stat(&zip_archive, 0, &file_stat)) {
     mz_zip_reader_end(&zip_archive);
-    TC_ERROR("mz_zip_reader_file_stat() failed!\n");
+    TI_ERROR("mz_zip_reader_file_stat() failed!\n");
   }
 
   if (verbose) {
-    TC_TRACE(
+    TI_TRACE(
         "Filename: {}, Comment: {}, Uncompressed size: {}, Compressed size: "
         "{}, Is Dir: {}\n",
         file_stat.m_filename, file_stat.m_comment,
@@ -83,7 +83,7 @@ std::vector<uint8> read(const std::string fn, bool verbose) {
   memset(&zip_archive, 0, sizeof(zip_archive));
   status = mz_zip_reader_init_file(&zip_archive, fn.c_str(), 0);
   if (!status) {
-    TC_ERROR("mz_zip_reader_init_file() failed!\n");
+    TI_ERROR("mz_zip_reader_init_file() failed!\n");
   }
 
   std::string fn_uncompressed = get_file_name_from_whole_path(fn);
@@ -95,13 +95,13 @@ std::vector<uint8> read(const std::string fn, bool verbose) {
 
   if (!p) {
     mz_zip_reader_end(&zip_archive);
-    TC_ERROR("mz_zip_reader_extract_file_to_heap() failed!");
+    TI_ERROR("mz_zip_reader_extract_file_to_heap() failed!");
   }
 
   if (verbose) {
-    TC_TRACE("Successfully extracted file {}, size {}", archive_filename,
+    TI_TRACE("Successfully extracted file {}, size {}", archive_filename,
              (uint)uncomp_size);
-    TC_TRACE("File data: {}", (const char *)p);
+    TI_TRACE("File data: {}", (const char *)p);
   }
 
   std::vector<uint8> ret(p, p + file_stat.m_uncomp_size);
@@ -111,4 +111,4 @@ std::vector<uint8> read(const std::string fn, bool verbose) {
 
 }  // namespace zip
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/visual/gui.h b/taichi/visual/gui.h
index 6fcab366acb8c..0dadc29ba3d6a 100644
--- a/taichi/visual/gui.h
+++ b/taichi/visual/gui.h
@@ -5,23 +5,23 @@
 #include <ctime>
 #include <numeric>
 
-#if defined(TC_PLATFORM_LINUX)
-#define TC_GUI_X11
+#if defined(TI_PLATFORM_LINUX)
+#define TI_GUI_X11
 #endif
 
-#if defined(TC_PLATFORM_WINDOWS)
-#define TC_GUI_WIN32
+#if defined(TI_PLATFORM_WINDOWS)
+#define TI_GUI_WIN32
 #endif
 
-#if defined(TC_PLATFORM_OSX)
-#define TC_GUI_COCOA
+#if defined(TI_PLATFORM_OSX)
+#define TI_GUI_COCOA
 #include <objc/objc.h>
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 // https://color.adobe.com/sea-waves-color-theme-11521801/edit/?copy=true&base=2&rule=Custom&selected=1&name=Copy%20of%20sea%20waves&mode=hsv&rgbvalues=0.009800000000000008,0.32993205279993043,0.49,0.006600000000000006,0.5184304355999791,0.66,0.007700000000000007,0.7445879672002016,0.77,0.6643,0.91,0.8608604914000435,0.8827,0.97,0.9059801164000182&swatchOrder=0,1,2,3,4
-TC_FORCE_INLINE Vector4 color_from_hex(uint32 c) {
+TI_FORCE_INLINE Vector4 color_from_hex(uint32 c) {
   return Vector4(c / 65536, c / 256 % 256, c % 256, 255) * (1 / 255.0_f);
 }
 
@@ -53,17 +53,17 @@ class Canvas {
     return *this;
   }
 
-  TC_FORCE_INLINE Canvas &color(real r, real g, real b, real a = 1) {
+  TI_FORCE_INLINE Canvas &color(real r, real g, real b, real a = 1) {
     context._color = Vector4(r, g, b, a);
     return *this;
   }
 
-  TC_FORCE_INLINE Canvas &color(int r, int g, int b, int a = 255) {
+  TI_FORCE_INLINE Canvas &color(int r, int g, int b, int a = 255) {
     context._color = (1.0_f / 255) * Vector4(r, g, b, a);
     return *this;
   }
 
-  TC_FORCE_INLINE Canvas &radius(real radius) {
+  TI_FORCE_INLINE Canvas &radius(real radius) {
     context._radius = radius;
     return *this;
   }
@@ -76,7 +76,7 @@ class Canvas {
     bool finished;
     static Vector2 vertices[128];  // TODO: ...
 
-    TC_FORCE_INLINE Line(Canvas &canvas)
+    TI_FORCE_INLINE Line(Canvas &canvas)
         : canvas(canvas),
           _color(canvas.context._color),
           _radius(canvas.context._radius) {
@@ -84,19 +84,19 @@ class Canvas {
       finished = false;
     }
 
-    TC_FORCE_INLINE Line(Canvas &canvas, Vector2 a, Vector2 b) : Line(canvas) {
+    TI_FORCE_INLINE Line(Canvas &canvas, Vector2 a, Vector2 b) : Line(canvas) {
       push(a);
       push(b);
     }
 
-    TC_FORCE_INLINE Line(Canvas &canvas, Vector2 a, Vector2 b, Vector2 c)
+    TI_FORCE_INLINE Line(Canvas &canvas, Vector2 a, Vector2 b, Vector2 c)
         : Line(canvas) {
       push(a);
       push(b);
       push(c);
     }
 
-    TC_FORCE_INLINE Line(Canvas &canvas,
+    TI_FORCE_INLINE Line(Canvas &canvas,
                          Vector2 a,
                          Vector2 b,
                          Vector2 c,
@@ -108,29 +108,29 @@ class Canvas {
       push(d);
     }
 
-    TC_FORCE_INLINE void push(Vector2 vec) {
+    TI_FORCE_INLINE void push(Vector2 vec) {
       vertices[n_vertices++] = vec;
     }
 
-    TC_FORCE_INLINE Line &path(Vector2 a) {
+    TI_FORCE_INLINE Line &path(Vector2 a) {
       push(a);
       return *this;
     }
 
-    TC_FORCE_INLINE Line &path(Vector2 a, Vector2 b) {
+    TI_FORCE_INLINE Line &path(Vector2 a, Vector2 b) {
       push(a);
       push(b);
       return *this;
     }
 
-    TC_FORCE_INLINE Line &path(Vector2 a, Vector2 b, Vector2 c) {
+    TI_FORCE_INLINE Line &path(Vector2 a, Vector2 b, Vector2 c) {
       push(a);
       push(b);
       push(c);
       return *this;
     }
 
-    TC_FORCE_INLINE Line &path(Vector2 a, Vector2 b, Vector2 c, Vector2 d) {
+    TI_FORCE_INLINE Line &path(Vector2 a, Vector2 b, Vector2 c, Vector2 d) {
       push(a);
       push(b);
       push(c);
@@ -138,37 +138,37 @@ class Canvas {
       return *this;
     }
 
-    TC_FORCE_INLINE Line &close() {
-      TC_ASSERT(n_vertices > 0);
+    TI_FORCE_INLINE Line &close() {
+      TI_ASSERT(n_vertices > 0);
       push(vertices[0]);
       return *this;
     }
 
-    TC_FORCE_INLINE Line &color(Vector4 color) {
+    TI_FORCE_INLINE Line &color(Vector4 color) {
       _color = color;
       return *this;
     }
 
-    TC_FORCE_INLINE Line &color(int c) {
+    TI_FORCE_INLINE Line &color(int c) {
       return color(c / 65536, c / 256 % 256, c % 256, 255);
     }
 
-    TC_FORCE_INLINE Line &color(real r, real g, real b, real a = 1) {
+    TI_FORCE_INLINE Line &color(real r, real g, real b, real a = 1) {
       _color = Vector4(r, g, b, a);
       return *this;
     }
 
-    TC_FORCE_INLINE Line &color(int r, int g, int b, int a = 255) {
+    TI_FORCE_INLINE Line &color(int r, int g, int b, int a = 255) {
       _color = (1.0_f / 255) * Vector4(r, g, b, a);
       return *this;
     }
 
-    TC_FORCE_INLINE Line &width(real width) {
+    TI_FORCE_INLINE Line &width(real width) {
       _radius = width * 0.5;
       return *this;
     }
 
-    TC_FORCE_INLINE Line &radius(real radius) {
+    TI_FORCE_INLINE Line &radius(real radius) {
       _radius = radius;
       return *this;
     }
@@ -208,7 +208,7 @@ class Canvas {
     }
 
     void finish() {
-      TC_ASSERT(!finished);
+      TI_ASSERT(!finished);
       finished = true;
       for (int i = 0; i + 1 < n_vertices; i++) {
         stroke(canvas.transform(vertices[i]),
@@ -224,7 +224,7 @@ class Canvas {
     real _radius;
     bool finished;
 
-    TC_FORCE_INLINE Circle(Canvas &canvas, Vector2 center)
+    TI_FORCE_INLINE Circle(Canvas &canvas, Vector2 center)
         : canvas(canvas),
           _center(center),
           _color(canvas.context._color),
@@ -232,32 +232,32 @@ class Canvas {
       finished = false;
     }
 
-    TC_FORCE_INLINE Circle &color(Vector4 color) {
+    TI_FORCE_INLINE Circle &color(Vector4 color) {
       _color = color;
       return *this;
     }
 
-    TC_FORCE_INLINE Circle &color(real r, real g, real b, real a = 1) {
+    TI_FORCE_INLINE Circle &color(real r, real g, real b, real a = 1) {
       _color = Vector4(r, g, b, a);
       return *this;
     }
 
-    TC_FORCE_INLINE Circle &color(int r, int g, int b, int a = 255) {
+    TI_FORCE_INLINE Circle &color(int r, int g, int b, int a = 255) {
       _color = (1.0_f / 255) * Vector4(r, g, b, a);
       return *this;
     }
 
-    TC_FORCE_INLINE Circle &color(int c) {
+    TI_FORCE_INLINE Circle &color(int c) {
       return color(c / 65536, c / 256 % 256, c % 256, 255);
     }
 
-    TC_FORCE_INLINE Circle &radius(real radius) {
+    TI_FORCE_INLINE Circle &radius(real radius) {
       _radius = radius;
       return *this;
     }
 
     void finish() {
-      TC_ASSERT(finished == false);
+      TI_ASSERT(finished == false);
       finished = true;
       auto center = canvas.transform(_center);
       auto const canvas_width = canvas.img.get_width();
@@ -279,7 +279,7 @@ class Canvas {
       }
     }
 
-    TC_FORCE_INLINE ~Circle() {
+    TI_FORCE_INLINE ~Circle() {
       if (!finished)
         finish();
     }
@@ -293,11 +293,11 @@ class Canvas {
     transform_matrix = Matrix3(Vector3(img.get_res().cast<real>(), 1.0_f));
   }
 
-  TC_FORCE_INLINE Vector2 transform(Vector2 x) const {
+  TI_FORCE_INLINE Vector2 transform(Vector2 x) const {
     return Vector2(transform_matrix * Vector3(x, 1.0_f));
   }
 
-  TC_FORCE_INLINE Vector2 untransform(Vector2 x) const {
+  TI_FORCE_INLINE Vector2 untransform(Vector2 x) const {
     return Vector2(inversed(transform_matrix) * Vector3(x, 1.0_f));
   }
 
@@ -402,7 +402,7 @@ class Canvas {
             real size,
             Vector4 color) {
     position = transform(position);
-#if defined(TC_AMALGAMATED)
+#if defined(TI_AMALGAMATED)
     auto ttf_path = std::string("");  // use amalgamated font
 #else
     std::string root_dir = get_repo_dir();
@@ -429,7 +429,7 @@ class Canvas {
   }
 };
 
-#if defined(TC_GUI_X11)
+#if defined(TI_GUI_X11)
 
 class CXImage;
 
@@ -445,7 +445,7 @@ using GUIBase = GUIBaseX11;
 
 #endif
 
-#if defined(TC_GUI_WIN32)
+#if defined(TI_GUI_WIN32)
 class GUIBaseWin32 {
  public:
   HWND hwnd;
@@ -458,7 +458,7 @@ class GUIBaseWin32 {
 using GUIBase = GUIBaseWin32;
 #endif
 
-#if defined(TC_GUI_COCOA)
+#if defined(TI_GUI_COCOA)
 class GUIBaseCocoa {
  public:
   id window, view;
@@ -511,7 +511,7 @@ class GUI : public GUIBase {
   struct Rect {
     Vector2i pos;
     Vector2i size;
-    TC_IO_DEF(pos, size);
+    TI_IO_DEF(pos, size);
     Rect() {
     }
     Rect(Vector2i pos, Vector2i size) : pos(pos), size(size) {
@@ -866,4 +866,4 @@ class GUI : public GUIBase {
   ~GUI();
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/visualization/image_buffer.cpp b/taichi/visualization/image_buffer.cpp
index cfd90b1059775..e7cfe1d6f6df4 100644
--- a/taichi/visualization/image_buffer.cpp
+++ b/taichi/visualization/image_buffer.cpp
@@ -7,11 +7,11 @@
 #include <taichi/math/linalg.h>
 #include <taichi/io/base64.h>
 
-#if !defined(TC_AMALGAMATED)
-#define TC_IMAGE_IO
+#if !defined(TI_AMALGAMATED)
+#define TI_IMAGE_IO
 #endif
 
-#if defined(TC_IMAGE_IO)
+#if defined(TI_IMAGE_IO)
 #define STB_IMAGE_IMPLEMENTATION
 #define STBI_FAILURE_USERMSG
 #include <stb_image.h>
@@ -21,11 +21,11 @@
 #include <stb_truetype.h>
 #endif
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
 template <typename T>
 void Array2D<T>::load_image(const std::string &filename, bool linearize) {
-#if !defined(TC_AMALGAMATED)
+#if !defined(TI_AMALGAMATED)
   int channels;
   FILE *f = fopen(filename.c_str(), "rb");
   assert_info(f != nullptr, "Image file not found: " + filename);
@@ -62,13 +62,13 @@ void Array2D<T>::load_image(const std::string &filename, bool linearize) {
 
   stbi_image_free(data);
 #else
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 #endif
 }
 
 template <typename T>
 void Array2D<T>::write_as_image(const std::string &filename) {
-#if defined(TC_IMAGE_IO)
+#if defined(TI_IMAGE_IO)
   int comp = 3;
   std::vector<unsigned char> data(this->res[0] * this->res[1] * comp);
   for (int i = 0; i < this->res[0]; i++) {
@@ -83,7 +83,7 @@ void Array2D<T>::write_as_image(const std::string &filename) {
       }
     }
   }
-  TC_ASSERT(filename.size() >= 5);
+  TI_ASSERT(filename.size() >= 5);
   int write_result = 0;
   std::string suffix = filename.substr(filename.size() - 4);
   if (suffix == ".png") {
@@ -98,18 +98,18 @@ void Array2D<T>::write_as_image(const std::string &filename) {
     write_result = stbi_write_jpg(filename.c_str(), this->res[0], this->res[1],
                                   comp, &data[0], 95);
   } else {
-    TC_ERROR("Unknown suffix {}", suffix);
+    TI_ERROR("Unknown suffix {}", suffix);
   }
 
-  TC_ASSERT_INFO((bool)write_result, "Cannot write image file");
+  TI_ASSERT_INFO((bool)write_result, "Cannot write image file");
 #else
-  TC_ERROR(
-      "'write_as_image' is not implemented. Append -DTC_IMAGE_IO to "
+  TI_ERROR(
+      "'write_as_image' is not implemented. Append -DTI_IMAGE_IO to "
       "compiler options if you are using taichi.h.");
 #endif
 }
 
-#if defined(TC_IMAGE_IO)
+#if defined(TI_IMAGE_IO)
 std::map<std::string, stbtt_fontinfo> fonts;
 std::map<std::string, std::vector<uint8>> font_buffers;
 #endif
@@ -121,7 +121,7 @@ void Array2D<T>::write_text(const std::string &font_fn,
                             int dx,
                             int dy,
                             T color) {
-#if defined(TC_IMAGE_IO)
+#if defined(TI_IMAGE_IO)
 
   std::vector<unsigned char> screen_buffer(
       (size_t)(this->res[0] * this->res[1]), (unsigned char)0);
@@ -136,18 +136,18 @@ void Array2D<T>::write_text(const std::string &font_fn,
         std::vector<unsigned char>(buffer_size, (unsigned char)0);
     if (font_fn != "") {
       FILE *font_file = fopen(font_fn.c_str(), "rb");
-      TC_ASSERT_INFO(font_file != nullptr,
+      TI_ASSERT_INFO(font_file != nullptr,
                      "Font file not found: " + std::string(font_fn));
       trash(fread(&font_buffers[font_fn][0], 1, buffer_size, font_file));
       fclose(font_file);
     } else {
-#if defined(TC_AMALGAMATED)
+#if defined(TI_AMALGAMATED)
       std::string decoded = base64_decode(go_font_str);
-      TC_ASSERT(decoded.size() < buffer_size);
+      TI_ASSERT(decoded.size() < buffer_size);
       std::memcpy(&font_buffers[font_fn][0], &decoded[0],
                   decoded.size() * sizeof(char));
 #else
-      TC_NOT_IMPLEMENTED
+      TI_NOT_IMPLEMENTED
 #endif
     }
     stbtt_InitFont(&font, &font_buffers[font_fn][0], 0);
@@ -191,11 +191,11 @@ void Array2D<T>::write_text(const std::string &font_fn,
     }
   }
 #else
-  TC_NOT_IMPLEMENTED
+  TI_NOT_IMPLEMENTED
 #endif
 }
 
-#if !defined(TC_AMALGAMATED)
+#if !defined(TI_AMALGAMATED)
 template void Array2D<Vector3>::write_text(const std::string &font_fn,
                                            const std::string &content_,
                                            real size,
@@ -245,4 +245,4 @@ void write_pgm(Array2D<real> img, const std::string &fn) {
   }
 }
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/tests/cpp/test_alg_simp.cpp b/tests/cpp/test_alg_simp.cpp
index e689c5c13b9f4..0e4dc5504fd51 100644
--- a/tests/cpp/test_alg_simp.cpp
+++ b/tests/cpp/test_alg_simp.cpp
@@ -5,7 +5,7 @@ TLANG_NAMESPACE_BEGIN
 
 // Basic tests within a basic block
 
-TC_TEST("simplify_add_zero") {
+TI_TEST("simplify_add_zero") {
   auto block = std::make_unique<Block>();
 
   auto global_load_addr =
@@ -19,16 +19,16 @@ TC_TEST("simplify_add_zero") {
   auto global_store = block->push_back<GlobalStoreStmt>(global_store_addr, add);
 
   irpass::typecheck(block.get());
-  TC_CHECK(block->size() == 6);  // two addresses, one load, one store
+  TI_CHECK(block->size() == 6);  // two addresses, one load, one store
 
-  irpass::print(block.get());
+  // irpass::print(block.get());
 
   irpass::alg_simp(block.get());  // should eliminate add
   irpass::die(block.get());       // should eliminate zero
 
-  irpass::print(block.get());
-  TC_CHECK(block->size() == 4);  // two addresses, one load, one store
-  TC_CHECK((*block)[0]->is<GlobalTemporaryStmt>());
+  // irpass::print(block.get());
+  TI_CHECK(block->size() == 4);  // two addresses, one load, one store
+  TI_CHECK((*block)[0]->is<GlobalTemporaryStmt>());
   // .. more tests, assuming instruction order not shuffled
 }
 
diff --git a/tests/cpp/test_dict.cpp b/tests/cpp/test_dict.cpp
index 5901e4fedd65b..4975361ff81b7 100644
--- a/tests/cpp/test_dict.cpp
+++ b/tests/cpp/test_dict.cpp
@@ -6,28 +6,28 @@
 #include <taichi/common/dict.h>
 #include <taichi/testing.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-TC_TEST("config") {
+TI_TEST("config") {
   Dict dict;
 
   dict.set("int_a", 123);
-  TC_CHECK(dict.get<int>("int_a") == 123);
+  TI_CHECK(dict.get<int>("int_a") == 123);
 
   dict.set("uint_a", 125);
-  TC_CHECK(dict.get<int>("uint_a") == 125);
+  TI_CHECK(dict.get<int>("uint_a") == 125);
 
   dict.set("float_a", 1.5_f32);
-  TC_CHECK_EQUAL(dict.get<float32>("float_a"), 1.5_f32, 1e-6_f);
+  TI_CHECK_EQUAL(dict.get<float32>("float_a"), 1.5_f32, 1e-6_f);
 
   dict.set("double_b", 0.125_f64);
-  TC_CHECK_EQUAL(dict.get<float64>("double_b"), 0.125_f64, 1e-6_f);
+  TI_CHECK_EQUAL(dict.get<float64>("double_b"), 0.125_f64, 1e-6_f);
 
   dict.set("vec_int", Vector3i(4, 6, 3));
-  TC_CHECK(dict.get<Vector3i>("vec_int") == Vector3i(4, 6, 3));
+  TI_CHECK(dict.get<Vector3i>("vec_int") == Vector3i(4, 6, 3));
 
   dict.set("str", "Hello");
-  TC_CHECK(dict.get<std::string>("str") == "Hello");
+  TI_CHECK(dict.get<std::string>("str") == "Hello");
 };
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/taichi/gui/test_bemo.cpp b/tests/cpp/test_gui.cpp
similarity index 64%
rename from taichi/gui/test_bemo.cpp
rename to tests/cpp/test_gui.cpp
index 598663a5a9b3c..716ca8d0aca24 100644
--- a/taichi/gui/test_bemo.cpp
+++ b/tests/cpp/test_gui.cpp
@@ -1,15 +1,17 @@
+// Note: this is not really a test case.
+
 #include <taichi/visual/gui.h>
 #include <taichi/common/task.h>
 
-TC_NAMESPACE_BEGIN
+TI_NAMESPACE_BEGIN
 
-auto test_bemo = []() {
-  GUI gui("Bemo Test", 1000, 400, false);
+auto test_gui = []() {
+  GUI gui("GUI Test", 1000, 400, false);
   auto canvas = *gui.canvas;
   real t = 0;
 
   int circle_count = 10;
-  gui.button("ABC", [] { TC_INFO("Triggered"); });
+  gui.button("ABC", [] { TI_INFO("Triggered"); });
   gui.slider("Circles", circle_count, 0, 60);
   real radius = 3;
   gui.slider("Radius", radius, 0.0_f, 10.0_f);
@@ -21,24 +23,28 @@ auto test_bemo = []() {
     for (int i = 0; i < 30; i++) {
       canvas.circle(i * 10 + 100, 250 + std::sin(t + i * 0.1_f) * 50_f)
           .color(0.7_f, 0.2_f, 0.0_f, 0.9_f)
-          .radius(5);
+          .radius(5)
+          .finish();
     }
-    canvas.color(0.0_f, 0.0_f, 1.0_f, 1.0_f).radius(5 + 2 * std::sin(t * 10_f));
+    canvas.color(0.0_f, 0.0_f, 1.0_f, 1.0_f)
+        .radius(5 + 2 * std::sin(t * 10_f));
     canvas.path()
         .path(Vector2(100, 100), Vector2(200, 75 + std::cos(t) * 50_f),
               Vector2(300, 75 + std::cos(t) * 50_f))
         .close()
         .color(0, 0, 0)
-        .width(5);
+        .width(5)
+        .finish();
 
     for (int i = 0; i < circle_count; i++) {
       canvas.circle(i * 10 + 100, 150 + std::sin(t + i * 0.1_f) * 50_f)
-          .radius(radius);
+          .radius(radius)
+          .finish();
     }
     gui.update();
   }
 };
 
-TC_REGISTER_TASK(test_bemo);
+TI_REGISTER_TASK(test_gui);
 
-TC_NAMESPACE_END
+TI_NAMESPACE_END
diff --git a/tests/python/test_new_allocator.py b/tests/python/test_new_allocator.py
index 3a97830c6cc8c..155e7a7429b0f 100644
--- a/tests/python/test_new_allocator.py
+++ b/tests/python/test_new_allocator.py
@@ -70,7 +70,7 @@ def test_alloc_in_kernel():
   return # build bots may not have this much memory to tests...
   x = ti.var(ti.f32)
   
-  ti.root.dense(ti.i, 8192).pointer().dense(ti.i, 1024 * 1024).place(x)
+  ti.root.pointer(ti.i, 8192).dense(ti.i, 1024 * 1024).place(x)
   
 
   @ti.kernel
diff --git a/tests/python/test_sparse_basics.py b/tests/python/test_sparse_basics.py
index 61b601f534f31..89b7d5445cfc7 100644
--- a/tests/python/test_sparse_basics.py
+++ b/tests/python/test_sparse_basics.py
@@ -36,7 +36,7 @@ def test_pointer():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x)
+    ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
     ti.root.place(s)
 
   @ti.kernel
@@ -58,7 +58,7 @@ def test_pointer_is_active():
 
   n = 128
   
-  ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x)
+  ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
   ti.root.place(s)
   
   @ti.kernel
@@ -83,8 +83,7 @@ def test_pointer2():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.i, n).pointer().dense(ti.i, n).pointer().dense(ti.i,
-                                                                    n).place(x)
+    ti.root.pointer(ti.i, n).pointer(ti.i, n).dense(ti.i, n).place(x)
     ti.root.place(s)
 
   @ti.kernel
diff --git a/tests/python/test_sparse_deactivate.py b/tests/python/test_sparse_deactivate.py
index da05f0452e779..385103162f718 100644
--- a/tests/python/test_sparse_deactivate.py
+++ b/tests/python/test_sparse_deactivate.py
@@ -8,10 +8,9 @@ def test_pointer():
 
   n = 16
 
-  @ti.layout
-  def place():
-    ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x)
-    ti.root.place(s)
+  ptr = ti.root.pointer(ti.i, n)
+  ptr.dense(ti.i, n).place(x)
+  ti.root.place(s)
 
   s[None] = 0
 
@@ -20,7 +19,6 @@ def func():
     for i in x:
       s[None] += 1
 
-
   x[0] = 1
   x[19] = 1
   func()
@@ -28,26 +26,26 @@ def func():
 
   @ti.kernel
   def deactivate():
-    ti.deactivate(x.parent().parent(), 4)
+    ti.deactivate(ptr, 4)
 
   deactivate()
   s[None] = 0
   func()
   assert s[None] == 16
 
+
 @ti.archs_support_sparse
 def test_pointer2():
   x = ti.var(ti.f32)
 
   n = 16
 
-  @ti.layout
-  def place():
-    ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x)
+  ptr = ti.root.pointer(ti.i, n)
+  ptr.dense(ti.i, n).place(x)
 
   @ti.kernel
   def func():
-    for i in range(n*n):
+    for i in range(n * n):
       x[i] = 1.0
 
   @ti.kernel
@@ -56,8 +54,8 @@ def set10():
 
   @ti.kernel
   def clear():
-    for i in x.parent().parent():
-      ti.deactivate(x.parent().parent(),i)
+    for i in ptr:
+      ti.deactivate(ptr, i)
 
   func()
   clear()
@@ -67,12 +65,13 @@ def clear():
 
   set10()
 
-  for i in range(n*n):
+  for i in range(n * n):
     if i != 10:
       assert x[i] == 0.0
     else:
       assert x[i] == 10.0
 
+
 @ti.archs_support_sparse
 def test_pointer3():
   x = ti.var(ti.f32)
@@ -80,42 +79,42 @@ def test_pointer3():
 
   n = 16
 
-  @ti.layout
-  def place():
-    ti.root.dense(ti.ij, n).pointer().dense(ti.ij, n).place(x)
-    ti.root.dense(ti.ij, n).pointer().dense(ti.ij, n).place(x_temp)
+  ptr1 = ti.root.pointer(ti.ij, n)
+  ptr1.dense(ti.ij, n).place(x)
+  ptr2 = ti.root.pointer(ti.ij, n)
+  ptr2.dense(ti.ij, n).place(x_temp)
 
   @ti.kernel
   def fill():
-    for j in range(n*n):
-      for i in range(n*n):
-        x[i,j] = i+j
+    for j in range(n * n):
+      for i in range(n * n):
+        x[i, j] = i + j
 
   @ti.kernel
   def fill2():
-    for i,j in x_temp:
-      if x_temp[i,j] < 100:
-        x[i,j] = x_temp[i,j]
+    for i, j in x_temp:
+      if x_temp[i, j] < 100:
+        x[i, j] = x_temp[i, j]
 
   @ti.kernel
   def copy_to_temp():
-    for i,j in x:
-      x_temp[i,j] = x[i,j]
+    for i, j in x:
+      x_temp[i, j] = x[i, j]
 
   @ti.kernel
   def copy_from_temp():
-    for i,j in x_temp:
-      x[i,j] = x_temp[i,j]
+    for i, j in x_temp:
+      x[i, j] = x_temp[i, j]
 
   @ti.kernel
   def clear():
-    for i,j in x.parent().parent():
-      ti.deactivate(x.parent().parent(),[i,j])
+    for i, j in ptr1:
+      ti.deactivate(ptr1, [i, j])
 
   @ti.kernel
   def clear_temp():
-    for i,j in x_temp.parent().parent():
-      ti.deactivate(x_temp.parent().parent(),[i,j])
+    for i, j in ptr2:
+      ti.deactivate(ptr2, [i, j])
 
   fill()
   copy_to_temp()
@@ -123,9 +122,7 @@ def clear_temp():
   fill2()
   clear_temp()
 
-
   for iter in range(100):
-    print(iter)
     copy_to_temp()
     clear()
     copy_from_temp()
@@ -133,9 +130,8 @@ def clear_temp():
 
     for j in range(n * n):
       for i in range(n * n):
-        if i+j < 100:
-          assert x[i,j] == i+j
-
+        if i + j < 100:
+          assert x[i, j] == i + j
 
 
 @ti.archs_support_sparse
@@ -145,36 +141,42 @@ def test_dynamic():
 
   n = 16
 
-  @ti.layout
-  def place():
-    ti.root.dense(ti.i, n).dynamic(ti.j, 4096).place(x)
-    ti.root.dense(ti.i, n).place(s)
+  lst = ti.root.dense(ti.i, n).dynamic(ti.j, 4096)
+  lst.place(x)
+  ti.root.dense(ti.i, n).place(s)
 
   @ti.kernel
   def func(mul: ti.i32):
     for i in range(n):
       for j in range(i * i * mul):
-        ti.append(x.parent(), i, j)
-      s[i] = ti.length(x.parent(), i)
+        ti.append(lst, i, j)
 
+  @ti.kernel
+  def fetch_length():
+    for i in range(n):
+      s[i] = ti.length(lst, i)
 
   func(1)
-
+  fetch_length()
   for i in range(n):
     assert s[i] == i * i
 
   @ti.kernel
   def clear():
     for i in range(n):
-      ti.deactivate(x.parent(), i)
+      ti.deactivate(lst, [i])
 
   func(2)
-
+  fetch_length()
   for i in range(n):
     assert s[i] == i * i * 3
 
   clear()
-  func(4)
+  fetch_length()
+  for i in range(n):
+    assert s[i] == 0
 
+  func(4)
+  fetch_length()
   for i in range(n):
     assert s[i] == i * i * 4
diff --git a/tests/python/test_sparse_parallel.py b/tests/python/test_sparse_parallel.py
index 3320df7179cc6..c40a2be3588a3 100644
--- a/tests/python/test_sparse_parallel.py
+++ b/tests/python/test_sparse_parallel.py
@@ -10,7 +10,7 @@ def test_pointer():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x)
+    ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
     ti.root.place(s)
 
   @ti.kernel
@@ -37,7 +37,7 @@ def test_pointer2():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.i, n).pointer().dense(ti.i, n).place(x)
+    ti.root.pointer(ti.i, n).dense(ti.i, n).place(x)
     ti.root.place(s)
 
   @ti.kernel
@@ -62,7 +62,7 @@ def test_nested_struct_fill_and_clear():
 
   @ti.layout
   def place():
-    ti.root.dense(ti.ij, [N, N]).pointer().dense(ti.ij, [8, 8]).place(a)
+    ti.root.pointer(ti.ij, [N, N]).dense(ti.ij, [8, 8]).place(a)
 
   @ti.kernel
   def fill():
diff --git a/tests/python/test_struct_for.py b/tests/python/test_struct_for.py
index 706e5ed2dff61..13fdaf3c4e092 100644
--- a/tests/python/test_struct_for.py
+++ b/tests/python/test_struct_for.py
@@ -215,6 +215,3 @@ def fill():
 
   for i in range(n):
     assert x[i] == i
-
-
-