From a21254adadc7205d8609d22b42f6139f0ce864ef Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 14:05:31 +0800 Subject: [PATCH 01/15] base --- misc/load_time.py | 7 ++++ python/taichi/__init__.py | 11 ++++++ python/taichi/core/util.py | 2 +- python/taichi/misc/timing.py | 77 ++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 misc/load_time.py create mode 100644 python/taichi/misc/timing.py diff --git a/misc/load_time.py b/misc/load_time.py new file mode 100644 index 0000000000000..51319dd3bae6e --- /dev/null +++ b/misc/load_time.py @@ -0,0 +1,7 @@ +from time import time +from colorama import Fore +t = time() +import taichi as ti +print('\nTotal load time:', Fore.LIGHTRED_EX, (time() - t) * 1000, Fore.RESET, 'ms\n') + +ti.profiler.print() diff --git a/python/taichi/__init__.py b/python/taichi/__init__.py index ad71b618da303..0531b2dab8dea 100644 --- a/python/taichi/__init__.py +++ b/python/taichi/__init__.py @@ -1,9 +1,16 @@ +from taichi.misc.timing import profiler + +profiler('main') from taichi.main import main +profiler('core') from taichi.core import ti_core from taichi.core import start_memory_monitoring, is_release, package_root +profiler('misc.util') from taichi.misc.util import vec, veci, set_gdb_trigger, print_profile_info, set_logging_level, info, warn, error, debug, trace, INFO, WARN, ERROR, DEBUG, TRACE from taichi.core.util import require_version +profiler('tools') from taichi.tools import * +profiler('misc.*') from taichi.misc import * from taichi.misc.gui import GUI from taichi.misc.np2ply import PLYWriter @@ -14,9 +21,12 @@ from taichi.misc.gui import rgb_to_hex from taichi.misc.error import * from taichi.misc.settings import * +profiler('tools.{video,file}') from taichi.tools.video import VideoManager from taichi.tools.file import * +profiler('lang') from taichi.lang import * +profiler('torch_io') from .torch_io import from_torch, to_torch @@ -29,3 +39,4 @@ def test(): __version__ = (core.get_version_major(), core.get_version_minor(), core.get_version_patch()) +profiler() diff --git a/python/taichi/core/util.py b/python/taichi/core/util.py index 61f097a972a48..e78ad054d9333 100644 --- a/python/taichi/core/util.py +++ b/python/taichi/core/util.py @@ -312,7 +312,7 @@ def prepare_sandbox(src): sys.path.append(tmp_dir) try: import_ti_core(tmp_dir) - except Exception as e: + except ImportError as e: from colorama import Fore, Back, Style print_red_bold("Taichi core import failed: ", end='') print(e) diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py new file mode 100644 index 0000000000000..c2afcfb5973d1 --- /dev/null +++ b/python/taichi/misc/timing.py @@ -0,0 +1,77 @@ +import time +from colorama import Fore, Back, Style + + +def _m(sec): + ms = sec * 1000 + if sec > 0.099: + return f'{Fore.LIGHTRED_EX}{sec:6.3f}{Fore.RESET}s' + else: + return f'{Fore.LIGHTMAGENTA_EX}{ms:5.2f}{Fore.RESET}ms' + + +def _c(cnt): + return f'{Fore.MAGENTA}{cnt:3d}{Fore.RESET}x' + + +def _n(name): + return f'{Fore.MAGENTA}{name}{Fore.RESET}' + + +class PythonProfiler: + def __init__(self, timer=time.time): + self.timer = timer + self.records = {} + self.started = {} + self.last_started = None + + def start(self, name): + t = self.timer() + self.started[name] = t + self.last_started = name + + def stop(self, name=None): + name = name or self.last_started + t = self.timer() + diff = t - self.started[name] + if name not in self.records: + self.records[name] = [] + self.records[name].append(diff) + + def __call__(self, name=None): + if self.last_started: + self.stop() + if name: + self.start(name) + + def print(self, name=None): + print(' min | avg | max | nr | name') + for name in self.records.keys() if name is None else [name]: + rec = self.records[name] + maximum = max(rec) + minimum = min(rec) + avg = sum(rec) / len(rec) + cnt = len(rec) + print(f'{_m(minimum)} | {_m(avg)} | {_m(maximum)} | {_c(cnt)} | {_n(name)}') + + def timed(self, name): + if callable(name): + foo = name + name = foo.__name__ + return decorator(name) + + def decorator(foo): + def wrapped(*args, **kwargs): + self.start(name) + ret = foo(*args, **kwargs) + self.stop(name) + self.print(name) + return ret + + return wrapped + + return decorator + + + +profiler = PythonProfiler() From 3babc4ea765d9e6fa9af72cf6eeb208bec4ea452 Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 14:17:39 +0800 Subject: [PATCH 02/15] clean --- misc/load_time.py | 9 ++++----- python/taichi/__init__.py | 11 +---------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/misc/load_time.py b/misc/load_time.py index 51319dd3bae6e..1b5956faf01c0 100644 --- a/misc/load_time.py +++ b/misc/load_time.py @@ -1,7 +1,6 @@ from time import time -from colorama import Fore t = time() -import taichi as ti -print('\nTotal load time:', Fore.LIGHTRED_EX, (time() - t) * 1000, Fore.RESET, 'ms\n') - -ti.profiler.print() +import taichi +t = time() - t +from colorama import Fore +print('\nTotal load time:', Fore.LIGHTRED_EX, t * 1000, Fore.RESET, 'ms\n') diff --git a/python/taichi/__init__.py b/python/taichi/__init__.py index 0531b2dab8dea..2173bc210384a 100644 --- a/python/taichi/__init__.py +++ b/python/taichi/__init__.py @@ -1,16 +1,9 @@ -from taichi.misc.timing import profiler - -profiler('main') from taichi.main import main -profiler('core') from taichi.core import ti_core from taichi.core import start_memory_monitoring, is_release, package_root -profiler('misc.util') from taichi.misc.util import vec, veci, set_gdb_trigger, print_profile_info, set_logging_level, info, warn, error, debug, trace, INFO, WARN, ERROR, DEBUG, TRACE from taichi.core.util import require_version -profiler('tools') from taichi.tools import * -profiler('misc.*') from taichi.misc import * from taichi.misc.gui import GUI from taichi.misc.np2ply import PLYWriter @@ -21,12 +14,10 @@ from taichi.misc.gui import rgb_to_hex from taichi.misc.error import * from taichi.misc.settings import * -profiler('tools.{video,file}') +from taichi.misc.timing import profiler from taichi.tools.video import VideoManager from taichi.tools.file import * -profiler('lang') from taichi.lang import * -profiler('torch_io') from .torch_io import from_torch, to_torch From 9569fe302f8410f90a3501484d0aeda96405178c Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 14:41:32 +0800 Subject: [PATCH 03/15] misc/test_python_profiler.py --- misc/load_time.py | 6 --- misc/test_python_profiler.py | 39 +++++++++++++++++++ python/taichi/misc/timing.py | 72 ++++++++++++++++++++++++++++-------- 3 files changed, 96 insertions(+), 21 deletions(-) delete mode 100644 misc/load_time.py create mode 100644 misc/test_python_profiler.py diff --git a/misc/load_time.py b/misc/load_time.py deleted file mode 100644 index 1b5956faf01c0..0000000000000 --- a/misc/load_time.py +++ /dev/null @@ -1,6 +0,0 @@ -from time import time -t = time() -import taichi -t = time() - t -from colorama import Fore -print('\nTotal load time:', Fore.LIGHTRED_EX, t * 1000, Fore.RESET, 'ms\n') diff --git a/misc/test_python_profiler.py b/misc/test_python_profiler.py new file mode 100644 index 0000000000000..6f50f8b6e741c --- /dev/null +++ b/misc/test_python_profiler.py @@ -0,0 +1,39 @@ +import taichi as ti + + +@ti.profiler.timed('foo', warmup=5) +@ti.kernel +def func0(): + print(233) + + +@ti.profiler.timed(warmup=5) +@ti.kernel +def func1(): + print(233) + + +@ti.profiler.timed('Func2') +@ti.kernel +def func2(): + print(666) + + +@ti.profiler.timed +@ti.kernel +def func3(): + pass + + +for k in range(5): + ti.profiler.start('step') + for i in range(10): + func0() + func1() + func2() + func3() + + ti.profiler.stop('step') + + +ti.profiler.print() diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py index c2afcfb5973d1..575fe1310d04e 100644 --- a/python/taichi/misc/timing.py +++ b/python/taichi/misc/timing.py @@ -1,4 +1,4 @@ -import time +import time, functools from colorama import Fore, Back, Style @@ -18,6 +18,31 @@ def _n(name): return f'{Fore.MAGENTA}{name}{Fore.RESET}' +class RecordStatistics: + def __init__(self, rec): + self.rec = rec + + @property + def min(self): + return min(self.rec) + + @property + def max(self): + return max(self.rec) + + @property + def total(self): + return sum(rec) + + @property + def avg(self): + return sum(self.rec) / len(self.rec) + + @property + def count(self): + return len(self.rec) + + class PythonProfiler: def __init__(self, timer=time.time): self.timer = timer @@ -46,31 +71,48 @@ def __call__(self, name=None): def print(self, name=None): print(' min | avg | max | nr | name') - for name in self.records.keys() if name is None else [name]: - rec = self.records[name] - maximum = max(rec) - minimum = min(rec) - avg = sum(rec) / len(rec) - cnt = len(rec) - print(f'{_m(minimum)} | {_m(avg)} | {_m(maximum)} | {_c(cnt)} | {_n(name)}') - - def timed(self, name): + if name is not None: + names = [name] + else: + def keyfunc(name): + rec = RecordStatistics(self.records[name]) + return -rec.avg + names = sorted(self.records.keys(), key=keyfunc) + for name in names: + rec = RecordStatistics(self.records[name]) + print(f'{_m(rec.min)} | {_m(rec.avg)} | {_m(rec.max)} | {_c(rec.count)} ' + f'| {_n(name)}') + + def timed(self, name=None, warmup=0): if callable(name): foo = name - name = foo.__name__ - return decorator(name) + name = None + else: + foo = None + + def deco(foo): + if deco.name is None: + name = foo.__name__ + else: + name = deco.name - def decorator(foo): + @functools.wraps(foo) def wrapped(*args, **kwargs): + if deco.warmup > 0: + deco.warmup -= 1 + return foo(*args, **kwargs) + self.start(name) ret = foo(*args, **kwargs) self.stop(name) - self.print(name) return ret return wrapped - return decorator + deco.name = name + deco.warmup = warmup + + return deco(foo) if foo is not None else deco From ef1b4bf7574f180ae24f755b9b460b07a496ebe0 Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 15:04:21 +0800 Subject: [PATCH 04/15] mpm99_timed --- misc/mpm99_timed.py | 106 +++++++++++++++++++++++++++++++++++ python/taichi/misc/timing.py | 71 +++++++++++++---------- 2 files changed, 148 insertions(+), 29 deletions(-) create mode 100644 misc/mpm99_timed.py diff --git a/misc/mpm99_timed.py b/misc/mpm99_timed.py new file mode 100644 index 0000000000000..e34db46e613b3 --- /dev/null +++ b/misc/mpm99_timed.py @@ -0,0 +1,106 @@ +import taichi as ti +import numpy as np +ti.init(arch=ti.gpu) # Try to run on GPU +quality = 1 # Use a larger value for higher-res simulations +n_particles, n_grid = 9000 * quality ** 2, 128 * quality +dx, inv_dx = 1 / n_grid, float(n_grid) +dt = 1e-4 / quality +p_vol, p_rho = (dx * 0.5)**2, 1 +p_mass = p_vol * p_rho +E, nu = 0.1e4, 0.2 # Young's modulus and Poisson's ratio +mu_0, lambda_0 = E / (2 * (1 + nu)), E * nu / ((1+nu) * (1 - 2 * nu)) # Lame parameters +x = ti.Vector(2, dt=ti.f32, shape=n_particles) # position +v = ti.Vector(2, dt=ti.f32, shape=n_particles) # velocity +C = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # affine velocity field +F = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # deformation gradient +material = ti.var(dt=ti.i32, shape=n_particles) # material id +Jp = ti.var(dt=ti.f32, shape=n_particles) # plastic deformation +grid_v = ti.Vector(2, dt=ti.f32, shape=(n_grid, n_grid)) # grid node momentum/velocity +grid_m = ti.var(dt=ti.f32, shape=(n_grid, n_grid)) # grid node mass + +@ti.profiler.timed(warmup=5) +@ti.kernel +def substep(): + for i, j in grid_m: + grid_v[i, j] = [0, 0] + grid_m[i, j] = 0 + for p in x: # Particle state update and scatter to grid (P2G) + base = (x[p] * inv_dx - 0.5).cast(int) + fx = x[p] * inv_dx - base.cast(float) + # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] + w = [0.5 * (1.5 - fx) ** 2, 0.75 - (fx - 1) ** 2, 0.5 * (fx - 0.5) ** 2] + F[p] = (ti.Matrix.identity(ti.f32, 2) + dt * C[p]) @ F[p] # deformation gradient update + h = ti.exp(10 * (1.0 - Jp[p])) # Hardening coefficient: snow gets harder when compressed + if material[p] == 1: # jelly, make it softer + h = 0.3 + mu, la = mu_0 * h, lambda_0 * h + if material[p] == 0: # liquid + mu = 0.0 + U, sig, V = ti.svd(F[p]) + J = 1.0 + for d in ti.static(range(2)): + new_sig = sig[d, d] + if material[p] == 2: # Snow + new_sig = min(max(sig[d, d], 1 - 2.5e-2), 1 + 4.5e-3) # Plasticity + Jp[p] *= sig[d, d] / new_sig + sig[d, d] = new_sig + J *= new_sig + if material[p] == 0: # Reset deformation gradient to avoid numerical instability + F[p] = ti.Matrix.identity(ti.f32, 2) * ti.sqrt(J) + elif material[p] == 2: + F[p] = U @ sig @ V.T() # Reconstruct elastic deformation gradient after plasticity + stress = 2 * mu * (F[p] - U @ V.T()) @ F[p].T() + ti.Matrix.identity(ti.f32, 2) * la * J * (J - 1) + stress = (-dt * p_vol * 4 * inv_dx * inv_dx) * stress + affine = stress + p_mass * C[p] + for i, j in ti.static(ti.ndrange(3, 3)): # Loop over 3x3 grid node neighborhood + offset = ti.Vector([i, j]) + dpos = (offset.cast(float) - fx) * dx + weight = w[i][0] * w[j][1] + grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos) + grid_m[base + offset] += weight * p_mass + for i, j in grid_m: + if grid_m[i, j] > 0: # No need for epsilon here + grid_v[i, j] = (1 / grid_m[i, j]) * grid_v[i, j] # Momentum to velocity + grid_v[i, j][1] -= dt * 50 # gravity + if i < 3 and grid_v[i, j][0] < 0: grid_v[i, j][0] = 0 # Boundary conditions + if i > n_grid - 3 and grid_v[i, j][0] > 0: grid_v[i, j][0] = 0 + if j < 3 and grid_v[i, j][1] < 0: grid_v[i, j][1] = 0 + if j > n_grid - 3 and grid_v[i, j][1] > 0: grid_v[i, j][1] = 0 + for p in x: # grid to particle (G2P) + base = (x[p] * inv_dx - 0.5).cast(int) + fx = x[p] * inv_dx - base.cast(float) + w = [0.5 * (1.5 - fx) ** 2, 0.75 - (fx - 1.0) ** 2, 0.5 * (fx - 0.5) ** 2] + new_v = ti.Vector.zero(ti.f32, 2) + new_C = ti.Matrix.zero(ti.f32, 2, 2) + for i, j in ti.static(ti.ndrange(3, 3)): # loop over 3x3 grid node neighborhood + dpos = ti.Vector([i, j]).cast(float) - fx + g_v = grid_v[base + ti.Vector([i, j])] + weight = w[i][0] * w[j][1] + new_v += weight * g_v + new_C += 4 * inv_dx * weight * g_v.outer_product(dpos) + v[p], C[p] = new_v, new_C + x[p] += dt * v[p] # advection + +group_size = n_particles // 3 +@ti.kernel +def initialize(): + for i in range(n_particles): + x[i] = [ti.random() * 0.2 + 0.3 + 0.10 * (i // group_size), ti.random() * 0.2 + 0.05 + 0.32 * (i // group_size)] + material[i] = i // group_size # 0: fluid 1: jelly 2: snow + v[i] = ti.Matrix([0, 0]) + F[i] = ti.Matrix([[1, 0], [0, 1]]) + Jp[i] = 1 +initialize() +gui = ti.GUI("Taichi MLS-MPM-99", res=512, background_color=0x112F41) +for frame in range(100): + ti.profiler('total_step') + for s in range(int(2e-3 // dt)): + substep() + colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) + ti.profiler('circles', warmup=5) + gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) + ti.profiler('show', warmup=5) + gui.show() + ti.profiler() + +ti.profiler.print() diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py index 575fe1310d04e..b9ddf64a88d9f 100644 --- a/python/taichi/misc/timing.py +++ b/python/taichi/misc/timing.py @@ -3,15 +3,21 @@ def _m(sec): - ms = sec * 1000 - if sec > 0.099: + if sec > 9.9: + return f'{Fore.LIGHTRED_EX}{sec:6.1f}{Fore.RESET}s' + elif sec > 0.099: return f'{Fore.LIGHTRED_EX}{sec:6.3f}{Fore.RESET}s' else: - return f'{Fore.LIGHTMAGENTA_EX}{ms:5.2f}{Fore.RESET}ms' + ms = sec * 1000 + if ms > 0.099: + return f'{Fore.LIGHTMAGENTA_EX}{ms:5.2f}{Fore.RESET}ms' + else: + ns = ms * 1000 + return f'{Fore.BLUE}{ns:5.2f}{Fore.RESET}ns' def _c(cnt): - return f'{Fore.MAGENTA}{cnt:3d}{Fore.RESET}x' + return f'{Fore.MAGENTA}{cnt:4d}{Fore.RESET}x' def _n(name): @@ -19,8 +25,13 @@ def _n(name): class RecordStatistics: - def __init__(self, rec): - self.rec = rec + def __init__(self, profiler, name): + self.record = profiler.records[name] + self.options = profiler.options[name] + self.rec = self.record + + warmup = self.options.get('warmup', 0) + self.rec = self.rec[warmup:] @property def min(self): @@ -32,7 +43,7 @@ def max(self): @property def total(self): - return sum(rec) + return sum(self.rec) @property def avg(self): @@ -48,71 +59,73 @@ def __init__(self, timer=time.time): self.timer = timer self.records = {} self.started = {} + self.options = {} self.last_started = None - def start(self, name): + def start(self, name, **options): + self.last_started = name + self.options[name] = self.options.get(name, options) t = self.timer() self.started[name] = t - self.last_started = name def stop(self, name=None): - name = name or self.last_started t = self.timer() + if not name and self.last_started: + name = self.last_started + self.last_started = None diff = t - self.started[name] + del self.started[name] + self.last_started = None if name not in self.records: self.records[name] = [] self.records[name].append(diff) - def __call__(self, name=None): + def __call__(self, name=None, **options): if self.last_started: self.stop() - if name: - self.start(name) + if name is not None: + self.start(name, **options) def print(self, name=None): - print(' min | avg | max | nr | name') + print(' min | avg | max | num | total | name') if name is not None: names = [name] else: def keyfunc(name): - rec = RecordStatistics(self.records[name]) + rec = RecordStatistics(self, name) return -rec.avg names = sorted(self.records.keys(), key=keyfunc) for name in names: - rec = RecordStatistics(self.records[name]) + rec = RecordStatistics(self, name) print(f'{_m(rec.min)} | {_m(rec.avg)} | {_m(rec.max)} | {_c(rec.count)} ' - f'| {_n(name)}') + f'| {_m(rec.total)} | {_n(name)}') - def timed(self, name=None, warmup=0): + def timed(self, name=None, **options): if callable(name): foo = name name = None else: foo = None - def deco(foo): - if deco.name is None: + def decorator(foo): + if decorator.name is None: name = foo.__name__ else: - name = deco.name + name = decorator.name @functools.wraps(foo) def wrapped(*args, **kwargs): - if deco.warmup > 0: - deco.warmup -= 1 - return foo(*args, **kwargs) - - self.start(name) + self.start(name, **decorator.options) ret = foo(*args, **kwargs) self.stop(name) return ret return wrapped - deco.name = name - deco.warmup = warmup + decorator.name = name + decorator.options = options - return deco(foo) if foo is not None else deco + return decorator(foo) if foo is not None else decorator From b3e6dd1751294dea5b70ae730a664d93b1999aad Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 15:07:01 +0800 Subject: [PATCH 05/15] total sort --- misc/mpm99_timed.py | 5 ++--- python/taichi/misc/timing.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/misc/mpm99_timed.py b/misc/mpm99_timed.py index e34db46e613b3..69ec15edc72bd 100644 --- a/misc/mpm99_timed.py +++ b/misc/mpm99_timed.py @@ -93,13 +93,12 @@ def initialize(): initialize() gui = ti.GUI("Taichi MLS-MPM-99", res=512, background_color=0x112F41) for frame in range(100): - ti.profiler('total_step') for s in range(int(2e-3 // dt)): substep() colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - ti.profiler('circles', warmup=5) + ti.profiler('gui.circles', warmup=5) gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - ti.profiler('show', warmup=5) + ti.profiler('gui.show', warmup=5) gui.show() ti.profiler() diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py index b9ddf64a88d9f..a3b34f20d91ab 100644 --- a/python/taichi/misc/timing.py +++ b/python/taichi/misc/timing.py @@ -93,7 +93,7 @@ def print(self, name=None): else: def keyfunc(name): rec = RecordStatistics(self, name) - return -rec.avg + return -rec.total names = sorted(self.records.keys(), key=keyfunc) for name in names: rec = RecordStatistics(self, name) From b581ceebf72635fe9d2c25270140d05f4d384148 Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 15:32:48 +0800 Subject: [PATCH 06/15] update doc --- a.py | 19 +++++ docs/profiler.rst | 181 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 183 insertions(+), 17 deletions(-) create mode 100644 a.py diff --git a/a.py b/a.py new file mode 100644 index 0000000000000..11cb38021ee2c --- /dev/null +++ b/a.py @@ -0,0 +1,19 @@ +from time import sleep +import taichi as ti +ti.init() + +def do_something_A(): + sleep(0.01) + +def do_something_B(): + sleep(0.1) + +ti.profiler.start('A') +do_something_A() +ti.profiler.stop('A') + +ti.profiler.start('B') +do_something_B() +ti.profiler.stop('B') + +ti.profiler.print() diff --git a/docs/profiler.rst b/docs/profiler.rst index f0295de1500b1..8248a8f39c69c 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -2,45 +2,157 @@ Profiler ======== -Taichi's profiler can help you analyze the run-time cost of your program. There are two profiling systems in Taichi: ``ScopedProfiler`` and ``KernelProfiler``. +Taichi's profiler can help you analyze the run-time cost of your program. -ScopedProfiler +Currently there are three profiling systems in Taichi: + +- ``PythonProfiler`` +- ``KernelProfiler`` +- ``ScopedProfiler`` (for developers) + + + +PythonProfiler ############## -1. ``ScopedProfiler`` measures time spent on the **host tasks** hierarchically. +``PythonProfiler`` basically measures time spent between ``start()`` and ``stop()`` using +the Python-builtin function ``time.time()``. + +There are 3 ways to use this profiler: -2. This profiler is automatically on. To show its results, call ``ti.print_profile_info()``. For example: +1. ``ti.profiler.start()`` and ``ti.profiler.stop()``, these are the most fundemental APIs: .. code-block:: python + from time import sleep import taichi as ti + ti.init() - ti.init(arch=ti.cpu) - var = ti.var(ti.f32, shape=1) + def do_something_A(): + sleep(0.01) + def do_something_B(): + sleep(0.1) - @ti.kernel - def compute(): - var[0] = 1.0 - print("Setting var[0] =", var[0]) + ti.profiler.start('A') + do_something_A() + ti.profiler.stop('A') + ti.profiler.start('B') + do_something_B() + ti.profiler.stop('B') - compute() - ti.print_profile_info() + ti.profiler.print() -``ti.print_profile_info()`` prints profiling results in a hierarchical format. +2. ``ti.profiler()``, this one makes our code cleaner: -.. Note:: +.. code-block:: python + + from time import sleep + import taichi as ti + ti.init() + + def do_something_A(): + sleep(0.01) + + def do_something_B(): + sleep(0.1) + + ti.profiler('A') # start('A') + do_something_A() + + ti.profiler('B') # stop('A') & start('B') + do_something_B() + ti.profiler() # stop('B') + + ti.profiler.print() + + +.. note:: + + Running 1 and 2 should obtain something like: + + .. code-block:: none + + min | avg | max | num | total | name + 0.100s | 0.100s | 0.100s | 1x | 0.100s | B + 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | A + + +3. ``@ti.profiler.timed``, this one is very intuitive when profiling kernels: + +.. code-block:: python + + from time import sleep + import taichi as ti + ti.init() + + @ti.profiler.timed + def do_something_A(): + sleep(0.01) + + @ti.profiler.timed + def do_something_B(): + sleep(0.1) + + do_something_A() + do_something_B() + + ti.profiler.print() + + +When combining ``@ti.profiler.timed`` with other decorators like ``@ti.kernel``, +then ``@ti.profiler.timed`` should be put **above** it, e.g.: + +.. code-block:: python + + @ti.profiler.timed + @ti.kernel + def substep(): + ... + + +.. note:: + + Running 3 should obtain something like: + + .. code-block:: none + + min | avg | max | num | total | name + 0.100s | 0.100s | 0.100s | 1x | 0.100s | do_something_B + 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | do_something_A + + +See `misc/mpm99_timed.py `_ for their usage example. + + +.. warning:: + + ``ti.profiler``, i.e. ``PythonProfiler``, **only works in Python-scope**, e.g.:: + + @ti.func + def substep(): + ti.profiler.start('hello') # won't work as you expected... + ... + ti.profiler.stop('hello') + + @ti.profiler.timed # won't work as you expected... + @ti.func + def hello(): + ... + + To do profiling **inside Taichi-scope**, please see the ``KernelProfiler`` section below. - ``ScopedProfiler`` is a C++ class in the core of Taichi. It is not exposed to Python users. KernelProfiler ############## -1. ``KernelProfiler`` records the costs of Taichi kernels on devices. To enable this profiler, set ``kernel_profiler=True`` in ``ti.init``. +``KernelProfiler`` records the costs of Taichi kernels on devices. -2. Call ``ti.kernel_profiler_print()`` to show the kernel profiling result. For example: +To enable this profiler, please initialize Taichi using ``ti.init(kernel_profiler=True)``. + +Call ``ti.kernel_profiler_print()`` to show the kernel profiling result. For example: .. code-block:: python :emphasize-lines: 3, 13 @@ -67,3 +179,38 @@ The outputs would be: [ 22.73%] jit_evaluator_0_kernel_0_serial min 0.001 ms avg 0.001 ms max 0.001 ms total 0.000 s [ 1x] [ 0.00%] jit_evaluator_1_kernel_1_serial min 0.000 ms avg 0.000 ms max 0.000 ms total 0.000 s [ 1x] [ 77.27%] compute_c4_0_kernel_2_serial min 0.004 ms avg 0.004 ms max 0.004 ms total 0.000 s [ 1x] + + +ScopedProfiler +############## + +``ScopedProfiler`` measures time spent on the **host tasks** hierarchically. + +This profiler is automatically on. + +To show its results, call ``ti.print_profile_info()``. For example: + +.. code-block:: python + + import taichi as ti + + ti.init(arch=ti.cpu) + var = ti.var(ti.f32, shape=1) + + + @ti.kernel + def compute(): + var[0] = 1.0 + print("Setting var[0] =", var[0]) + + + compute() + ti.print_profile_info() + + +``ti.print_profile_info()`` prints profiling results in a hierarchical format. + +.. Note:: + + ``ScopedProfiler`` is a C++ class in the core of Taichi. It is not exposed to Python users. + From 6be7061f22edbfb22abb469f58aed01f40bbbdba Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Tue, 14 Jul 2020 15:55:44 +0800 Subject: [PATCH 07/15] doc warmup=5 --- a.py | 19 -------- docs/profiler.rst | 105 +++++++++++++++++++++++++++++++++++++++----- misc/mpm99_timed.py | 2 +- 3 files changed, 94 insertions(+), 32 deletions(-) delete mode 100644 a.py diff --git a/a.py b/a.py deleted file mode 100644 index 11cb38021ee2c..0000000000000 --- a/a.py +++ /dev/null @@ -1,19 +0,0 @@ -from time import sleep -import taichi as ti -ti.init() - -def do_something_A(): - sleep(0.01) - -def do_something_B(): - sleep(0.1) - -ti.profiler.start('A') -do_something_A() -ti.profiler.stop('A') - -ti.profiler.start('B') -do_something_B() -ti.profiler.stop('B') - -ti.profiler.print() diff --git a/docs/profiler.rst b/docs/profiler.rst index 8248a8f39c69c..45193fb0d0c48 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -2,7 +2,7 @@ Profiler ======== -Taichi's profiler can help you analyze the run-time cost of your program. +Taichi's profiler can help you analyze the run-time cost of your program, i.e. performance. Currently there are three profiling systems in Taichi: @@ -13,11 +13,14 @@ Currently there are three profiling systems in Taichi: PythonProfiler -############## +-------------- ``PythonProfiler`` basically measures time spent between ``start()`` and ``stop()`` using the Python-builtin function ``time.time()``. +Profiling APIs +************** + There are 3 ways to use this profiler: 1. ``ti.profiler.start()`` and ``ti.profiler.stop()``, these are the most fundemental APIs: @@ -102,15 +105,17 @@ There are 3 ways to use this profiler: ti.profiler.print() -When combining ``@ti.profiler.timed`` with other decorators like ``@ti.kernel``, -then ``@ti.profiler.timed`` should be put **above** it, e.g.: +.. warning:: -.. code-block:: python + When combining ``@ti.profiler.timed`` with other decorators like ``@ti.kernel``, + then ``@ti.profiler.timed`` should be put **above** it, e.g.: - @ti.profiler.timed - @ti.kernel - def substep(): - ... + .. code-block:: python + + @ti.profiler.timed + @ti.kernel + def substep(): + ... .. note:: @@ -124,7 +129,83 @@ then ``@ti.profiler.timed`` should be put **above** it, e.g.: 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | do_something_A -See `misc/mpm99_timed.py `_ for their usage example. +Recording multiple entries +************************** + +When a same **name** is used for multiple times, then they will be merged into one, e.g.: + +.. code-block:: python + + from time import sleep + import taichi as ti + ti.init() + + def do_something_A(): + sleep(0.01) + + def do_something_B(): + sleep(0.1) + + ti.profiler.start('A') + do_something_A() + ti.profiler.stop('A') + + ti.profiler.start('A') + do_something_B() + ti.profiler.stop('A') + + ti.profiler.print() + +will obtain: + +.. code-block:: none + + min | avg | max | num | total | name + 10.10ms | 55.12ms | 0.100s | 2x | 0.110s | A + + +- ``min`` is the minimum time in records. +- ``avg`` is the average time of records. +- ``max`` is the maximum time in records. +- ``num`` is the number of record entries. +- ``total`` is the total costed time of records. + + +Profiler options +**************** + +Due to Taichi's JIT mechinism, a kernel will be **compiled** on its first invocation. +So the first record will be extremely long compared to the following records since it +**involves both compile time and execution time**, e.g.: + +.. code-block:: none + + min | avg | max | num | total | name + 2.37ms | 3.79ms | 1.615s | 1900x | 7.204s | substep + +.. code-block:: none + + min | avg | max | num | total | name + 2.37ms | 2.95ms | 12.70ms | 1895x | 5.592s | substep + + +As you see, this make our result inaccurate, especially the ``max`` column. + +To avoid this, you may specify a ``warmup`` option to ``ti.profiler``, e.g.: + +.. code-block:: python + + @ti.profiler.timed(warmup=5) + @ti.kernel + def substep(): + ... + + +Set ``warmup=5`` for example, will **discard** the first 5 record entries. +I.e. discard the kernel compile time and possible TLB and cache misses on start up. + + +Check out `misc/mpm99_timed.py `_ for sum up usage example. .. warning:: @@ -146,7 +227,7 @@ See `misc/mpm99_timed.py Date: Tue, 14 Jul 2020 22:01:54 +0800 Subject: [PATCH 08/15] [skip ci] rev --- python/taichi/__init__.py | 1 - python/taichi/core/util.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/taichi/__init__.py b/python/taichi/__init__.py index 2173bc210384a..e64206090cff4 100644 --- a/python/taichi/__init__.py +++ b/python/taichi/__init__.py @@ -30,4 +30,3 @@ def test(): __version__ = (core.get_version_major(), core.get_version_minor(), core.get_version_patch()) -profiler() diff --git a/python/taichi/core/util.py b/python/taichi/core/util.py index e78ad054d9333..61f097a972a48 100644 --- a/python/taichi/core/util.py +++ b/python/taichi/core/util.py @@ -312,7 +312,7 @@ def prepare_sandbox(src): sys.path.append(tmp_dir) try: import_ti_core(tmp_dir) - except ImportError as e: + except Exception as e: from colorama import Fore, Back, Style print_red_bold("Taichi core import failed: ", end='') print(e) From 234a70c501bca758fad71d6ddb7accf24cebb3b1 Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Tue, 14 Jul 2020 10:02:21 -0400 Subject: [PATCH 09/15] [skip ci] enforce code format --- docs/profiler.rst | 1 - misc/mpm99_timed.py | 196 ++++++++++++++++++++--------------- misc/test_python_profiler.py | 1 - python/taichi/misc/timing.py | 8 +- 4 files changed, 115 insertions(+), 91 deletions(-) diff --git a/docs/profiler.rst b/docs/profiler.rst index 45193fb0d0c48..78743a89cd652 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -294,4 +294,3 @@ To show its results, call ``ti.print_profile_info()``. For example: .. Note:: ``ScopedProfiler`` is a C++ class in the core of Taichi. It is not exposed to Python users. - diff --git a/misc/mpm99_timed.py b/misc/mpm99_timed.py index 341fed792a24d..d92259521a750 100644 --- a/misc/mpm99_timed.py +++ b/misc/mpm99_timed.py @@ -1,105 +1,129 @@ import taichi as ti import numpy as np -ti.init(arch=ti.gpu) # Try to run on GPU -quality = 1 # Use a larger value for higher-res simulations -n_particles, n_grid = 9000 * quality ** 2, 128 * quality +ti.init(arch=ti.gpu) # Try to run on GPU +quality = 1 # Use a larger value for higher-res simulations +n_particles, n_grid = 9000 * quality**2, 128 * quality dx, inv_dx = 1 / n_grid, float(n_grid) dt = 1e-4 / quality p_vol, p_rho = (dx * 0.5)**2, 1 p_mass = p_vol * p_rho -E, nu = 0.1e4, 0.2 # Young's modulus and Poisson's ratio -mu_0, lambda_0 = E / (2 * (1 + nu)), E * nu / ((1+nu) * (1 - 2 * nu)) # Lame parameters -x = ti.Vector(2, dt=ti.f32, shape=n_particles) # position -v = ti.Vector(2, dt=ti.f32, shape=n_particles) # velocity -C = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # affine velocity field -F = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # deformation gradient -material = ti.var(dt=ti.i32, shape=n_particles) # material id -Jp = ti.var(dt=ti.f32, shape=n_particles) # plastic deformation -grid_v = ti.Vector(2, dt=ti.f32, shape=(n_grid, n_grid)) # grid node momentum/velocity -grid_m = ti.var(dt=ti.f32, shape=(n_grid, n_grid)) # grid node mass +E, nu = 0.1e4, 0.2 # Young's modulus and Poisson's ratio +mu_0, lambda_0 = E / (2 * (1 + nu)), E * nu / ( + (1 + nu) * (1 - 2 * nu)) # Lame parameters +x = ti.Vector(2, dt=ti.f32, shape=n_particles) # position +v = ti.Vector(2, dt=ti.f32, shape=n_particles) # velocity +C = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # affine velocity field +F = ti.Matrix(2, 2, dt=ti.f32, shape=n_particles) # deformation gradient +material = ti.var(dt=ti.i32, shape=n_particles) # material id +Jp = ti.var(dt=ti.f32, shape=n_particles) # plastic deformation +grid_v = ti.Vector(2, dt=ti.f32, + shape=(n_grid, n_grid)) # grid node momentum/velocity +grid_m = ti.var(dt=ti.f32, shape=(n_grid, n_grid)) # grid node mass + @ti.profiler.timed(warmup=10) @ti.kernel def substep(): - for i, j in grid_m: - grid_v[i, j] = [0, 0] - grid_m[i, j] = 0 - for p in x: # Particle state update and scatter to grid (P2G) - base = (x[p] * inv_dx - 0.5).cast(int) - fx = x[p] * inv_dx - base.cast(float) - # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] - w = [0.5 * (1.5 - fx) ** 2, 0.75 - (fx - 1) ** 2, 0.5 * (fx - 0.5) ** 2] - F[p] = (ti.Matrix.identity(ti.f32, 2) + dt * C[p]) @ F[p] # deformation gradient update - h = ti.exp(10 * (1.0 - Jp[p])) # Hardening coefficient: snow gets harder when compressed - if material[p] == 1: # jelly, make it softer - h = 0.3 - mu, la = mu_0 * h, lambda_0 * h - if material[p] == 0: # liquid - mu = 0.0 - U, sig, V = ti.svd(F[p]) - J = 1.0 - for d in ti.static(range(2)): - new_sig = sig[d, d] - if material[p] == 2: # Snow - new_sig = min(max(sig[d, d], 1 - 2.5e-2), 1 + 4.5e-3) # Plasticity - Jp[p] *= sig[d, d] / new_sig - sig[d, d] = new_sig - J *= new_sig - if material[p] == 0: # Reset deformation gradient to avoid numerical instability - F[p] = ti.Matrix.identity(ti.f32, 2) * ti.sqrt(J) - elif material[p] == 2: - F[p] = U @ sig @ V.T() # Reconstruct elastic deformation gradient after plasticity - stress = 2 * mu * (F[p] - U @ V.T()) @ F[p].T() + ti.Matrix.identity(ti.f32, 2) * la * J * (J - 1) - stress = (-dt * p_vol * 4 * inv_dx * inv_dx) * stress - affine = stress + p_mass * C[p] - for i, j in ti.static(ti.ndrange(3, 3)): # Loop over 3x3 grid node neighborhood - offset = ti.Vector([i, j]) - dpos = (offset.cast(float) - fx) * dx - weight = w[i][0] * w[j][1] - grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos) - grid_m[base + offset] += weight * p_mass - for i, j in grid_m: - if grid_m[i, j] > 0: # No need for epsilon here - grid_v[i, j] = (1 / grid_m[i, j]) * grid_v[i, j] # Momentum to velocity - grid_v[i, j][1] -= dt * 50 # gravity - if i < 3 and grid_v[i, j][0] < 0: grid_v[i, j][0] = 0 # Boundary conditions - if i > n_grid - 3 and grid_v[i, j][0] > 0: grid_v[i, j][0] = 0 - if j < 3 and grid_v[i, j][1] < 0: grid_v[i, j][1] = 0 - if j > n_grid - 3 and grid_v[i, j][1] > 0: grid_v[i, j][1] = 0 - for p in x: # grid to particle (G2P) - base = (x[p] * inv_dx - 0.5).cast(int) - fx = x[p] * inv_dx - base.cast(float) - w = [0.5 * (1.5 - fx) ** 2, 0.75 - (fx - 1.0) ** 2, 0.5 * (fx - 0.5) ** 2] - new_v = ti.Vector.zero(ti.f32, 2) - new_C = ti.Matrix.zero(ti.f32, 2, 2) - for i, j in ti.static(ti.ndrange(3, 3)): # loop over 3x3 grid node neighborhood - dpos = ti.Vector([i, j]).cast(float) - fx - g_v = grid_v[base + ti.Vector([i, j])] - weight = w[i][0] * w[j][1] - new_v += weight * g_v - new_C += 4 * inv_dx * weight * g_v.outer_product(dpos) - v[p], C[p] = new_v, new_C - x[p] += dt * v[p] # advection + for i, j in grid_m: + grid_v[i, j] = [0, 0] + grid_m[i, j] = 0 + for p in x: # Particle state update and scatter to grid (P2G) + base = (x[p] * inv_dx - 0.5).cast(int) + fx = x[p] * inv_dx - base.cast(float) + # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] + w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] + F[p] = (ti.Matrix.identity(ti.f32, 2) + + dt * C[p]) @ F[p] # deformation gradient update + h = ti.exp( + 10 * + (1.0 - + Jp[p])) # Hardening coefficient: snow gets harder when compressed + if material[p] == 1: # jelly, make it softer + h = 0.3 + mu, la = mu_0 * h, lambda_0 * h + if material[p] == 0: # liquid + mu = 0.0 + U, sig, V = ti.svd(F[p]) + J = 1.0 + for d in ti.static(range(2)): + new_sig = sig[d, d] + if material[p] == 2: # Snow + new_sig = min(max(sig[d, d], 1 - 2.5e-2), + 1 + 4.5e-3) # Plasticity + Jp[p] *= sig[d, d] / new_sig + sig[d, d] = new_sig + J *= new_sig + if material[ + p] == 0: # Reset deformation gradient to avoid numerical instability + F[p] = ti.Matrix.identity(ti.f32, 2) * ti.sqrt(J) + elif material[p] == 2: + F[p] = U @ sig @ V.T( + ) # Reconstruct elastic deformation gradient after plasticity + stress = 2 * mu * (F[p] - U @ V.T()) @ F[p].T() + ti.Matrix.identity( + ti.f32, 2) * la * J * (J - 1) + stress = (-dt * p_vol * 4 * inv_dx * inv_dx) * stress + affine = stress + p_mass * C[p] + for i, j in ti.static(ti.ndrange( + 3, 3)): # Loop over 3x3 grid node neighborhood + offset = ti.Vector([i, j]) + dpos = (offset.cast(float) - fx) * dx + weight = w[i][0] * w[j][1] + grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos) + grid_m[base + offset] += weight * p_mass + for i, j in grid_m: + if grid_m[i, j] > 0: # No need for epsilon here + grid_v[i, + j] = (1 / grid_m[i, j]) * grid_v[i, + j] # Momentum to velocity + grid_v[i, j][1] -= dt * 50 # gravity + if i < 3 and grid_v[i, j][0] < 0: + grid_v[i, j][0] = 0 # Boundary conditions + if i > n_grid - 3 and grid_v[i, j][0] > 0: grid_v[i, j][0] = 0 + if j < 3 and grid_v[i, j][1] < 0: grid_v[i, j][1] = 0 + if j > n_grid - 3 and grid_v[i, j][1] > 0: grid_v[i, j][1] = 0 + for p in x: # grid to particle (G2P) + base = (x[p] * inv_dx - 0.5).cast(int) + fx = x[p] * inv_dx - base.cast(float) + w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2] + new_v = ti.Vector.zero(ti.f32, 2) + new_C = ti.Matrix.zero(ti.f32, 2, 2) + for i, j in ti.static(ti.ndrange( + 3, 3)): # loop over 3x3 grid node neighborhood + dpos = ti.Vector([i, j]).cast(float) - fx + g_v = grid_v[base + ti.Vector([i, j])] + weight = w[i][0] * w[j][1] + new_v += weight * g_v + new_C += 4 * inv_dx * weight * g_v.outer_product(dpos) + v[p], C[p] = new_v, new_C + x[p] += dt * v[p] # advection + group_size = n_particles // 3 + + @ti.kernel def initialize(): - for i in range(n_particles): - x[i] = [ti.random() * 0.2 + 0.3 + 0.10 * (i // group_size), ti.random() * 0.2 + 0.05 + 0.32 * (i // group_size)] - material[i] = i // group_size # 0: fluid 1: jelly 2: snow - v[i] = ti.Matrix([0, 0]) - F[i] = ti.Matrix([[1, 0], [0, 1]]) - Jp[i] = 1 + for i in range(n_particles): + x[i] = [ + ti.random() * 0.2 + 0.3 + 0.10 * (i // group_size), + ti.random() * 0.2 + 0.05 + 0.32 * (i // group_size) + ] + material[i] = i // group_size # 0: fluid 1: jelly 2: snow + v[i] = ti.Matrix([0, 0]) + F[i] = ti.Matrix([[1, 0], [0, 1]]) + Jp[i] = 1 + + initialize() gui = ti.GUI("Taichi MLS-MPM-99", res=512, background_color=0x112F41) for frame in range(100): - for s in range(int(2e-3 // dt)): - substep() - colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - ti.profiler('gui.circles', warmup=5) - gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - ti.profiler('gui.show', warmup=5) - gui.show() - ti.profiler() + for s in range(int(2e-3 // dt)): + substep() + colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) + ti.profiler('gui.circles', warmup=5) + gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) + ti.profiler('gui.show', warmup=5) + gui.show() + ti.profiler() ti.profiler.print() diff --git a/misc/test_python_profiler.py b/misc/test_python_profiler.py index 6f50f8b6e741c..dba2b673ca329 100644 --- a/misc/test_python_profiler.py +++ b/misc/test_python_profiler.py @@ -35,5 +35,4 @@ def func3(): ti.profiler.stop('step') - ti.profiler.print() diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py index a3b34f20d91ab..b50905fd3c93d 100644 --- a/python/taichi/misc/timing.py +++ b/python/taichi/misc/timing.py @@ -91,14 +91,17 @@ def print(self, name=None): if name is not None: names = [name] else: + def keyfunc(name): rec = RecordStatistics(self, name) return -rec.total + names = sorted(self.records.keys(), key=keyfunc) for name in names: rec = RecordStatistics(self, name) - print(f'{_m(rec.min)} | {_m(rec.avg)} | {_m(rec.max)} | {_c(rec.count)} ' - f'| {_m(rec.total)} | {_n(name)}') + print( + f'{_m(rec.min)} | {_m(rec.avg)} | {_m(rec.max)} | {_c(rec.count)} ' + f'| {_m(rec.total)} | {_n(name)}') def timed(self, name=None, **options): if callable(name): @@ -128,5 +131,4 @@ def wrapped(*args, **kwargs): return decorator(foo) if foo is not None else decorator - profiler = PythonProfiler() From 5eedb7f453fad552204674c90e64daf2f4b41d68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E4=BA=8E=E6=96=8C?= <1931127624@qq.com> Date: Wed, 15 Jul 2020 12:35:29 +0800 Subject: [PATCH 10/15] [skip ci] Apply suggestions from code review Co-authored-by: Xudong Feng --- docs/profiler.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/profiler.rst b/docs/profiler.rst index 78743a89cd652..174bce02f11a6 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -23,7 +23,7 @@ Profiling APIs There are 3 ways to use this profiler: -1. ``ti.profiler.start()`` and ``ti.profiler.stop()``, these are the most fundemental APIs: +1. ``ti.profiler.start()`` and ``ti.profiler.stop()`` are the most fundemental APIs: .. code-block:: python @@ -174,7 +174,7 @@ will obtain: Profiler options **************** -Due to Taichi's JIT mechinism, a kernel will be **compiled** on its first invocation. +Due to Taichi's JIT mechanism, a kernel will be **compiled** on its first invocation. So the first record will be extremely long compared to the following records since it **involves both compile time and execution time**, e.g.: @@ -205,7 +205,7 @@ Set ``warmup=5`` for example, will **discard** the first 5 record entries. I.e. discard the kernel compile time and possible TLB and cache misses on start up. -Check out `misc/mpm99_timed.py `_ for sum up usage example. +Check out `misc/mpm99_timed.py `_ for a summary example. .. warning:: From 9a9057afc2a85c1f53ddb0c312981f037d15baee Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Thu, 16 Jul 2020 12:57:18 +0800 Subject: [PATCH 11/15] apply confuse from @Rullec --- docs/profiler.rst | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/docs/profiler.rst b/docs/profiler.rst index 174bce02f11a6..dfcd96fc1f77a 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -24,6 +24,8 @@ Profiling APIs There are 3 ways to use this profiler: 1. ``ti.profiler.start()`` and ``ti.profiler.stop()`` are the most fundemental APIs: + It will measure the time difference between ``start`` and ``stop``. + Then save the result according to the given name. e.g.: .. code-block:: python @@ -48,7 +50,15 @@ There are 3 ways to use this profiler: ti.profiler.print() -2. ``ti.profiler()``, this one makes our code cleaner: +.. code-block:: none + + min | avg | max | num | total | name + 0.100s | 0.100s | 0.100s | 1x | 0.100s | B + 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | A + + +2. ``ti.profiler()``, this one makes our code cleaner. + Basically, it will automatically invoke ``stop`` when a new record is started. e.g.: .. code-block:: python @@ -72,18 +82,16 @@ There are 3 ways to use this profiler: ti.profiler.print() -.. note:: - - Running 1 and 2 should obtain something like: - - .. code-block:: none +.. code-block:: none - min | avg | max | num | total | name - 0.100s | 0.100s | 0.100s | 1x | 0.100s | B - 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | A + min | avg | max | num | total | name + 0.100s | 0.100s | 0.100s | 1x | 0.100s | B + 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | A -3. ``@ti.profiler.timed``, this one is very intuitive when profiling kernels: +3. ``@ti.profiler.timed``, this one is very intuitive when profiling kernels. + It will measure the time spent in the function, i.e. ``start`` when entering the function, + ``stop`` when leaving the function, and the record name is the function name. .. code-block:: python @@ -105,6 +113,13 @@ There are 3 ways to use this profiler: ti.profiler.print() +.. code-block:: none + + min | avg | max | num | total | name + 0.100s | 0.100s | 0.100s | 1x | 0.100s | do_something_B + 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | do_something_A + + .. warning:: When combining ``@ti.profiler.timed`` with other decorators like ``@ti.kernel``, @@ -118,17 +133,6 @@ There are 3 ways to use this profiler: ... -.. note:: - - Running 3 should obtain something like: - - .. code-block:: none - - min | avg | max | num | total | name - 0.100s | 0.100s | 0.100s | 1x | 0.100s | do_something_B - 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | do_something_A - - Recording multiple entries ************************** From 7d7b8efe84e8872c73fe10cbe3753c55cead75e6 Mon Sep 17 00:00:00 2001 From: archibate <1931127624@qq.com> Date: Fri, 17 Jul 2020 22:41:14 +0800 Subject: [PATCH 12/15] with ti.profiler(name) for creating TimedScope --- docs/profiler.rst | 22 +++++++++++++++------- misc/mpm99_timed.py | 9 ++++----- python/taichi/misc/timing.py | 19 ++++++++++++++----- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/docs/profiler.rst b/docs/profiler.rst index dfcd96fc1f77a..ce8c65dc0cfea 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -57,8 +57,10 @@ There are 3 ways to use this profiler: 10.10ms | 10.10ms | 10.10ms | 1x | 10.10ms | A -2. ``ti.profiler()``, this one makes our code cleaner. - Basically, it will automatically invoke ``stop`` when a new record is started. e.g.: +2. ``with ti.profiler()``, this one makes our code cleaner and readable. + Basically, it will automatically invoke ``stop`` when the indented block exited. + For more details about the ``with`` syntax in Python, + see `this tutorial `_. .. code-block:: python @@ -72,12 +74,13 @@ There are 3 ways to use this profiler: def do_something_B(): sleep(0.1) - ti.profiler('A') # start('A') - do_something_A() + with ti.profiler('A'): + do_something_A() + # automatically invoke stop('A') - ti.profiler('B') # stop('A') & start('B') - do_something_B() - ti.profiler() # stop('B') + with ti.profiler('B'): + do_something_B() + # automatically invoke stop('B') ti.profiler.print() @@ -158,6 +161,10 @@ When a same **name** is used for multiple times, then they will be merged into o do_something_B() ti.profiler.stop('A') + ti.profiler.start('B') + do_something_B() + ti.profiler.stop('B') + ti.profiler.print() will obtain: @@ -166,6 +173,7 @@ will obtain: min | avg | max | num | total | name 10.10ms | 55.12ms | 0.100s | 2x | 0.110s | A + 0.100s | 0.100s | 0.100s | 1x | 0.100s | B - ``min`` is the minimum time in records. diff --git a/misc/mpm99_timed.py b/misc/mpm99_timed.py index d92259521a750..88ef381907b3b 100644 --- a/misc/mpm99_timed.py +++ b/misc/mpm99_timed.py @@ -120,10 +120,9 @@ def initialize(): for s in range(int(2e-3 // dt)): substep() colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - ti.profiler('gui.circles', warmup=5) - gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - ti.profiler('gui.show', warmup=5) - gui.show() - ti.profiler() + with ti.profiler('gui.circles', warmup=5): + gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) + with ti.profiler('gui.show', warmup=5): + gui.show() ti.profiler.print() diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py index b50905fd3c93d..785ff517b70b7 100644 --- a/python/taichi/misc/timing.py +++ b/python/taichi/misc/timing.py @@ -80,11 +80,20 @@ def stop(self, name=None): self.records[name] = [] self.records[name].append(diff) - def __call__(self, name=None, **options): - if self.last_started: - self.stop() - if name is not None: - self.start(name, **options) + class TimedScope: + def __init__(self, profiler, name, **options): + self.profiler = profiler + self.name = name + self.profiler.start(name, **options) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.profiler.stop(self.name) + + def __call__(self, name, **options): + return self.TimedScope(self, name, **options) def print(self, name=None): print(' min | avg | max | num | total | name') From baf3817b31424ea99a30719c7116c8148973f68d Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Fri, 17 Jul 2020 10:41:29 -0400 Subject: [PATCH 13/15] [skip ci] enforce code format --- misc/mpm99_timed.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/misc/mpm99_timed.py b/misc/mpm99_timed.py index 88ef381907b3b..b28c331d34f24 100644 --- a/misc/mpm99_timed.py +++ b/misc/mpm99_timed.py @@ -121,7 +121,9 @@ def initialize(): substep() colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) with ti.profiler('gui.circles', warmup=5): - gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) + gui.circles(x.to_numpy(), + radius=1.5, + color=colors[material.to_numpy()]) with ti.profiler('gui.show', warmup=5): gui.show() From fde1ae8206b16e02edc9720c70d2abf87b3b75a4 Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Thu, 23 Jul 2020 09:51:27 -0400 Subject: [PATCH 14/15] [skip ci] enforce code format --- python/taichi/misc/timing.py | 1 - tests/python/test_bitmasked.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/taichi/misc/timing.py b/python/taichi/misc/timing.py index a6da09b528a83..d189a3d5bf6dc 100644 --- a/python/taichi/misc/timing.py +++ b/python/taichi/misc/timing.py @@ -142,5 +142,4 @@ def wrapped(*args, **kwargs): profiler = PythonProfiler() - __all__ = ['profiler'] diff --git a/tests/python/test_bitmasked.py b/tests/python/test_bitmasked.py index 9de1397169fc2..627c8724fc205 100644 --- a/tests/python/test_bitmasked.py +++ b/tests/python/test_bitmasked.py @@ -223,7 +223,7 @@ def test_bitmasked_offset_child(): bm = ti.root.bitmasked(ti.i, n) bm.dense(ti.i, 16).place(x, x2) bm2 = bm.bitmasked(ti.i, 4) - + bm2.dense(ti.i, 4).place(y, y2, y3) bm2.bitmasked(ti.i, 4).place(z) From c2bbec5277629bc20085090fd5a44e6ab7c9a6b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E4=BA=8E=E6=96=8C?= <1931127624@qq.com> Date: Thu, 30 Jul 2020 15:03:04 +0800 Subject: [PATCH 15/15] Apply suggestions from code review Co-authored-by: Yuanming Hu --- docs/profiler.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/profiler.rst b/docs/profiler.rst index ce8c65dc0cfea..fc15122898f0e 100644 --- a/docs/profiler.rst +++ b/docs/profiler.rst @@ -2,7 +2,7 @@ Profiler ======== -Taichi's profiler can help you analyze the run-time cost of your program, i.e. performance. +Taichi's profilers can help analyze the performance of your program. Currently there are three profiling systems in Taichi: