From c97e0f318727772bca902b7b58e8a8e3aa5ef4d8 Mon Sep 17 00:00:00 2001 From: xumingkuan Date: Sat, 22 Aug 2020 17:14:13 +0800 Subject: [PATCH 1/3] [benchmark] [async] Add more statistics for async benchmark --- benchmarks/mpm2d.py | 12 +++---- python/taichi/lang/__init__.py | 60 +++++++++++++++++++++++----------- taichi/python/export_lang.cpp | 5 +++ 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/benchmarks/mpm2d.py b/benchmarks/mpm2d.py index c88a747513c5a..e2a30959bfe2b 100644 --- a/benchmarks/mpm2d.py +++ b/benchmarks/mpm2d.py @@ -120,7 +120,7 @@ def substep(): compile_time = time.time() substep() compile_time = time.time() - compile_time - ti.stat_write_yaml('compilation_time(s)', compile_time) + ti.stat_write('compilation_time', compile_time) ti.get_runtime().sync() t = time.time() for frame in range(200): @@ -130,8 +130,8 @@ def substep(): # gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) # gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk ti.get_runtime().sync() - avg = (time.time() - t) / 4000 * 1000 # miliseconds - ti.stat_write_yaml('running_time(ms)', avg) + avg = (time.time() - t) / 4000 + ti.stat_write('running_time', avg) @ti.archs_excluding(ti.opengl) @@ -253,7 +253,7 @@ def substep(): compile_time = time.time() substep() compile_time = time.time() - compile_time - ti.stat_write_yaml('compilation_time(s)', compile_time) + ti.stat_write('compilation_time', compile_time) ti.get_runtime().sync() t = time.time() for frame in range(200): @@ -263,5 +263,5 @@ def substep(): # gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) # gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk ti.get_runtime().sync() - avg = (time.time() - t) / 4000 * 1000 # miliseconds - ti.stat_write_yaml('running_time(ms)', avg) + avg = (time.time() - t) / 4000 + ti.stat_write('running_time', avg) diff --git a/python/taichi/lang/__init__.py b/python/taichi/lang/__init__.py index 5d9f0b4b4ca6f..de3fc8b0df28e 100644 --- a/python/taichi/lang/__init__.py +++ b/python/taichi/lang/__init__.py @@ -321,25 +321,45 @@ def visit(node): def benchmark(func, repeat=300, args=()): import taichi as ti import time - compile_time = time.time() - func(*args) - compile_time = time.time() - compile_time - ti.stat_write_yaml('compilation_time(s)', compile_time) - # The reason why we run 4 times is to warm up instruction/data caches. - # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136 - for i in range(4): - func(*args) # compile the kernel first - ti.sync() - t = time.time() - for n in range(repeat): + def run_benchmark(): + compile_time = time.time() func(*args) - ti.get_runtime().sync() - elapsed = time.time() - t - avg = elapsed / repeat * 1000 # miliseconds - ti.stat_write_yaml('running_time(ms)', avg) - - -def stat_write_yaml(key, value): + compile_time = time.time() - compile_time + ti.stat_write('compilation_time', compile_time) + codegen_stat = ti.core.stat() + for line in codegen_stat.split('\n'): + try: + a, b = line.strip().split(':') + except: + continue + a = a.strip() + b = int(float(b)) + if a == 'codegen_kernel_statements': + ti.stat_write('instructions', b) + if a == 'codegen_offloaded_tasks': + ti.stat_write('offloaded_tasks', b) + elif a == 'launched_kernels': + ti.stat_write('launched_kernels', b) + # The reason why we run 4 times is to warm up instruction/data caches. + # Discussion: https://github.com/taichi-dev/taichi/pull/1002#discussion_r426312136 + for i in range(4): + func(*args) # compile the kernel first + ti.sync() + t = time.time() + for n in range(repeat): + func(*args) + ti.get_runtime().sync() + elapsed = time.time() - t + avg = elapsed / repeat + ti.stat_write('running_time', avg) + ti.cfg.async_mode = False + run_benchmark() + if ti.is_extension_supported(ti.cfg.arch, ti.extension.async_mode): + ti.cfg.async_mode = True + run_benchmark() + + +def stat_write(key, value): import taichi as ti import yaml case_name = os.environ.get('TI_CURRENT_BENCHMARK') @@ -348,6 +368,7 @@ def stat_write_yaml(key, value): if case_name.startswith('benchmark_'): case_name = case_name[10:] arch_name = core.arch_name(ti.cfg.arch) + async_mode = 'async' if ti.cfg.async_mode else 'sync' output_dir = os.environ.get('TI_BENCHMARK_OUTPUT_DIR', '.') filename = f'{output_dir}/benchmark.yml' try: @@ -357,7 +378,8 @@ def stat_write_yaml(key, value): data = {} data.setdefault(key, {}) data[key].setdefault(case_name, {}) - data[key][case_name][arch_name] = value + data[key][case_name].setdefault(async_mode, {}) + data[key][case_name][async_mode][arch_name] = value with open(filename, 'w') as f: yaml.dump(data, f, Dumper=yaml.SafeDumper) diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 90f44e487708e..8b44712a95de6 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -595,6 +595,11 @@ void export_lang(py::module &m) { m.def("is_extension_supported", is_extension_supported); m.def("print_stat", [] { stat.print(); }); + m.def("stat", [] { + std::string result; + stat.print(&result); + return result; + }); m.def("record_action_hint", [](std::string content) { ActionRecorder::get_instance().record("hint", From ef4e9cd2265493e7cd555bf8c7008531487bbf3f Mon Sep 17 00:00:00 2001 From: xumingkuan Date: Sat, 22 Aug 2020 17:28:05 +0800 Subject: [PATCH 2/3] Use ti.benchmark() in mpm2d.py --- benchmarks/mpm2d.py | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/benchmarks/mpm2d.py b/benchmarks/mpm2d.py index e2a30959bfe2b..8f52f59fb7d70 100644 --- a/benchmarks/mpm2d.py +++ b/benchmarks/mpm2d.py @@ -117,21 +117,7 @@ def substep(): F[i] = [[1, 0], [0, 1]] Jp[i] = 1 - compile_time = time.time() - substep() - compile_time = time.time() - compile_time - ti.stat_write('compilation_time', compile_time) - ti.get_runtime().sync() - t = time.time() - for frame in range(200): - for s in range(20): - substep() - # colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - # gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - # gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk - ti.get_runtime().sync() - avg = (time.time() - t) / 4000 - ti.stat_write('running_time', avg) + ti.benchmark(substep, repeat=4000) @ti.archs_excluding(ti.opengl) @@ -250,18 +236,4 @@ def substep(): F[i] = [[1, 0], [0, 1]] Jp[i] = 1 - compile_time = time.time() - substep() - compile_time = time.time() - compile_time - ti.stat_write('compilation_time', compile_time) - ti.get_runtime().sync() - t = time.time() - for frame in range(200): - for s in range(20): - substep() - # colors = np.array([0x068587, 0xED553B, 0xEEEEF0], dtype=np.uint32) - # gui.circles(x.to_numpy(), radius=1.5, color=colors[material.to_numpy()]) - # gui.show() # Change to gui.show(f'{frame:06d}.png') to write images to disk - ti.get_runtime().sync() - avg = (time.time() - t) / 4000 - ti.stat_write('running_time', avg) + ti.benchmark(substep, repeat=4000) From 5e23b002481a806e22225d1b365d48f235f159ae Mon Sep 17 00:00:00 2001 From: Taichi Gardener Date: Sat, 22 Aug 2020 05:37:56 -0400 Subject: [PATCH 3/3] [skip ci] enforce code format --- python/taichi/lang/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/taichi/lang/__init__.py b/python/taichi/lang/__init__.py index de3fc8b0df28e..71191f9f98002 100644 --- a/python/taichi/lang/__init__.py +++ b/python/taichi/lang/__init__.py @@ -321,6 +321,7 @@ def visit(node): def benchmark(func, repeat=300, args=()): import taichi as ti import time + def run_benchmark(): compile_time = time.time() func(*args) @@ -352,6 +353,7 @@ def run_benchmark(): elapsed = time.time() - t avg = elapsed / repeat ti.stat_write('running_time', avg) + ti.cfg.async_mode = False run_benchmark() if ti.is_extension_supported(ti.cfg.arch, ti.extension.async_mode):