diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b8b9cc98fbcb9..9d3e90d78fb7d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -48,10 +48,10 @@ jobs: run: | if [ -z "${{ github.event.action }}" ]; then # For nightly release, we only run on python 3.8 - echo '::set-output name=matrix::{"include":[{"name":"taichi-nightly","python":"3.8","conda_python":"py38"}]}"' + echo '::set-output name=matrix::{"include":[{"name":"taichi-nightly","python":"3.8","conda_python":"py38"},{"name":"taichi-nightly","python":"3.10","conda_python":"py310"}]}"' # M1 only supports py38 and py39(conda), so change matrix. - echo '::set-output name=matrix_osx::{"include":[{"name":"taichi-nightly","python":"3.8"}]}"' + echo '::set-output name=matrix_osx::{"include":[{"name":"taichi-nightly","python":"3.8"},{"name":"taichi-nightly","python":"3.10"}]}"' else # For production release, we run on four python versions. echo '::set-output name=matrix::{"include":[{"name":"taichi","python":"3.6","conda_python":"py36"},{"name":"taichi","python":"3.7","conda_python":"py37"},{"name":"taichi","python":"3.8","conda_python":"py38"},{"name":"taichi","python":"3.9","conda_python":"py39"}]}"' diff --git a/docs/lang/articles/advanced/sparse_matrix.md b/docs/lang/articles/advanced/sparse_matrix.md index 72c449ad423a2..5e69c1d7a457b 100644 --- a/docs/lang/articles/advanced/sparse_matrix.md +++ b/docs/lang/articles/advanced/sparse_matrix.md @@ -22,7 +22,7 @@ n = 4 K = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) @ti.kernel -def fill(A: ti.linalg.sparse_matrix_builder()): +def fill(A: ti.types.sparse_matrix_builder()): for i in range(n): A[i, i] += 1 # Only += and -= operators are supported for now. @@ -146,7 +146,7 @@ K = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) b = ti.field(ti.f32, shape=n) @ti.kernel -def fill(A: ti.linalg.sparse_matrix_builder(), b: ti.template(), interval: ti.i32): +def fill(A: ti.types.sparse_matrix_builder(), b: ti.template(), interval: ti.i32): for i in range(n): A[i, i] += 2.0 diff --git a/misc/spMv_linear_solve.py b/misc/spMv_linear_solve.py index 87bea0728673e..721648f45bb8c 100644 --- a/misc/spMv_linear_solve.py +++ b/misc/spMv_linear_solve.py @@ -9,7 +9,7 @@ @ti.kernel -def fill(A: ti.linalg.sparse_matrix_builder(), b: ti.template(), +def fill(A: ti.types.sparse_matrix_builder(), b: ti.template(), interval: ti.i32): for i in range(n): A[i, i] += 2.0 diff --git a/misc/sparse_matrix.py b/misc/sparse_matrix.py index 49cac7af58e87..a56054f7aedcd 100644 --- a/misc/sparse_matrix.py +++ b/misc/sparse_matrix.py @@ -9,8 +9,8 @@ @ti.kernel -def fill(A: ti.linalg.sparse_matrix_builder(), - b: ti.linalg.sparse_matrix_builder(), interval: ti.i32): +def fill(A: ti.types.sparse_matrix_builder(), + b: ti.types.sparse_matrix_builder(), interval: ti.i32): for i in range(n): if i > 0: A[i - 1, i] += -1.0 diff --git a/python/taichi/__init__.py b/python/taichi/__init__.py index 94f94c411951a..7d834259bcba4 100644 --- a/python/taichi/__init__.py +++ b/python/taichi/__init__.py @@ -9,7 +9,7 @@ # Provide a shortcut to types since they're commonly used. from taichi.types.primitive_types import * -from taichi import ad, linalg, tools +from taichi import ad, experimental, linalg, tools from taichi.ui import GUI, hex_to_rgb, rgb_to_hex, ui # Issue#2223: Do not reorder, or we're busted with partially initialized module diff --git a/python/taichi/_version_check.py b/python/taichi/_version_check.py index 1718f70b5aa18..5958aecab2455 100644 --- a/python/taichi/_version_check.py +++ b/python/taichi/_version_check.py @@ -1,11 +1,15 @@ +import datetime import json import os import platform import threading +import uuid from urllib import request +from taichi._lib import core as _ti_core -def check_version(): + +def check_version(cur_uuid): # Check Taichi version for the user. major = _ti_core.get_version_major() minor = _ti_core.get_version_minor() @@ -35,7 +39,10 @@ def check_version(): payload['python'] = 'cp38' elif python_version.startswith('3.9.'): payload['python'] = 'cp39' + elif python_version.startswith('3.10.'): + payload['python'] = 'cp310' + payload['uuid'] = cur_uuid # We do not want request exceptions break users' usage of Taichi. try: payload = json.dumps(payload) @@ -50,39 +57,40 @@ def check_version(): return None +def write_version_info(response, cur_uuid, version_info_path, cur_date): + if response is None: + return + with open(version_info_path, 'w') as f: + f.write((cur_date).strftime('%Y-%m-%d')) + f.write('\n') + if response['status'] == 1: + f.write(response['latest_version']) + else: + f.write('0.0.0') + f.write('\n') + f.write(cur_uuid) + f.write('\n') + + def try_check_version(): try: os.makedirs(_ti_core.get_repo_dir(), exist_ok=True) - timestamp_path = os.path.join(_ti_core.get_repo_dir(), 'timestamp') + version_info_path = os.path.join(_ti_core.get_repo_dir(), + 'version_info') cur_date = datetime.date.today() - if os.path.exists(timestamp_path): - last_time = '' - with open(timestamp_path, 'r') as f: - last_time = f.readlines()[0].rstrip() + if os.path.exists(version_info_path): + with open(version_info_path, 'r') as f: + version_info_file = f.readlines() + last_time = version_info_file[0].rstrip() + cur_uuid = version_info_file[2].rstrip() if cur_date.strftime('%Y-%m-%d') > last_time: - response = check_version() - if response is None: - return - with open(timestamp_path, 'w') as f: - f.write((cur_date + - datetime.timedelta(days=7)).strftime('%Y-%m-%d')) - f.write('\n') - if response['status'] == 1: - f.write(response['latest_version']) - else: - f.write('0.0.0') + response = check_version(cur_uuid) + write_version_info(response, cur_uuid, version_info_path, + cur_date) else: - response = check_version() - if response is None: - return - with open(timestamp_path, 'w') as f: - f.write((cur_date + - datetime.timedelta(days=7)).strftime('%Y-%m-%d')) - f.write('\n') - if response['status'] == 1: - f.write(response['latest_version']) - else: - f.write('0.0.0') + cur_uuid = str(uuid.uuid4()) + response = check_version(cur_uuid) + write_version_info(response, cur_uuid, version_info_path, cur_date) # Wildcard exception to catch potential file writing errors. except: pass diff --git a/python/taichi/examples/simulation/implicit_mass_spring.py b/python/taichi/examples/simulation/implicit_mass_spring.py index 38deb2daa2f0c..698d73df096de 100644 --- a/python/taichi/examples/simulation/implicit_mass_spring.py +++ b/python/taichi/examples/simulation/implicit_mass_spring.py @@ -85,7 +85,7 @@ def init_edges(self): rest_len[idx] = (pos[idx1] - pos[idx2]).norm() @ti.kernel - def init_mass_sp(self, M: ti.linalg.sparse_matrix_builder()): + def init_mass_sp(self, M: ti.types.sparse_matrix_builder()): for i in range(self.NV): mass = self.mass[i] M[2 * i + 0, 2 * i + 0] += mass @@ -137,7 +137,7 @@ def compute_Jacobians(self): self.Jf[1] = ti.Matrix([[-self.kf, 0], [0, -self.kf]]) @ti.kernel - def assemble_K(self, K: ti.linalg.sparse_matrix_builder()): + def assemble_K(self, K: ti.types.sparse_matrix_builder()): for i in self.spring: idx1, idx2 = self.spring[i][0], self.spring[i][1] for m, n in ti.static(ti.ndrange(2, 2)): @@ -150,7 +150,7 @@ def assemble_K(self, K: ti.linalg.sparse_matrix_builder()): K[2 * (self.NV - 1) + m, 2 * (self.NV - 1) + n] += self.Jf[1][m, n] @ti.kernel - def assemble_D(self, D: ti.linalg.sparse_matrix_builder()): + def assemble_D(self, D: ti.types.sparse_matrix_builder()): for i in self.spring: idx1, idx2 = self.spring[i][0], self.spring[i][1] for m, n in ti.static(ti.ndrange(2, 2)): diff --git a/python/taichi/examples/simulation/stable_fluid.py b/python/taichi/examples/simulation/stable_fluid.py index d0a8f293bd6ad..864fd4204799a 100644 --- a/python/taichi/examples/simulation/stable_fluid.py +++ b/python/taichi/examples/simulation/stable_fluid.py @@ -69,7 +69,7 @@ def swap(self): if use_sparse_matrix: # use a sparse matrix to solve Poisson's pressure equation. @ti.kernel - def fill_laplacian_matrix(A: ti.linalg.sparse_matrix_builder()): + def fill_laplacian_matrix(A: ti.types.sparse_matrix_builder()): for i, j in ti.ndrange(res, res): row = i * res + j center = 0.0 diff --git a/python/taichi/experimental.py b/python/taichi/experimental.py new file mode 100644 index 0000000000000..13df4c4a9baf3 --- /dev/null +++ b/python/taichi/experimental.py @@ -0,0 +1,3 @@ +from taichi.lang.kernel_impl import real_func + +__all__ = ["real_func"] diff --git a/python/taichi/lang/ast/ast_transformer.py b/python/taichi/lang/ast/ast_transformer.py index 58af5053fc5eb..002cc2de76780 100644 --- a/python/taichi/lang/ast/ast_transformer.py +++ b/python/taichi/lang/ast/ast_transformer.py @@ -456,8 +456,10 @@ def transform_as_kernel(): ctx.create_variable(arg.arg, ctx.global_vars[arg.arg]) elif isinstance(ctx.func.argument_annotations[i], annotations.sparse_matrix_builder): - ctx.create_variable(arg.arg, - kernel_arguments.decl_sparse_matrix()) + ctx.create_variable( + arg.arg, + kernel_arguments.decl_sparse_matrix( + to_taichi_type(ctx.arg_features[i]))) elif isinstance(ctx.func.argument_annotations[i], annotations.any_arr): ctx.create_variable( @@ -482,7 +484,7 @@ def transform_as_kernel(): transform_as_kernel() else: # ti.func - if impl.get_runtime().experimental_real_function: + if ctx.is_real_function: transform_as_kernel() else: len_args = len(args.args) @@ -524,12 +526,12 @@ def transform_as_kernel(): @staticmethod def build_Return(ctx, node): - if not impl.get_runtime().experimental_real_function: + if not ctx.is_real_function: if ctx.is_in_non_static_control_flow(): raise TaichiSyntaxError( "Return inside non-static if/for is not supported") build_stmt(ctx, node.value) - if ctx.is_kernel or impl.get_runtime().experimental_real_function: + if ctx.is_kernel or ctx.is_real_function: # TODO: check if it's at the end of a kernel, throw TaichiSyntaxError if not if node.value is not None: if ctx.func.return_type is None: @@ -557,7 +559,7 @@ def build_Return(ctx, node): # only need to replace the object part, i.e. args[0].value else: ctx.return_data = node.value.ptr - if not impl.get_runtime().experimental_real_function: + if not ctx.is_real_function: ctx.returned = True return None @@ -1109,13 +1111,11 @@ def build_If(ctx, node): @staticmethod def build_Expr(ctx, node): build_stmt(ctx, node.value) - if not isinstance( - node.value, - ast.Call) or not impl.get_runtime().experimental_real_function: + if not isinstance(node.value, ast.Call): return None is_taichi_function = getattr(node.value.func.ptr, '_is_taichi_function', False) - if is_taichi_function: + if is_taichi_function and node.value.func.ptr._is_real_function: func_call_result = node.value.ptr ctx.ast_builder.insert_expr_stmt(func_call_result.ptr) return None diff --git a/python/taichi/lang/ast/ast_transformer_utils.py b/python/taichi/lang/ast/ast_transformer_utils.py index bf39aa5874768..900351cc75555 100644 --- a/python/taichi/lang/ast/ast_transformer_utils.py +++ b/python/taichi/lang/ast/ast_transformer_utils.py @@ -114,7 +114,8 @@ def __init__(self, file=None, src=None, start_lineno=None, - ast_builder=None): + ast_builder=None, + is_real_function=False): self.func = func self.local_scopes = [] self.loop_scopes = [] @@ -140,6 +141,7 @@ def __init__(self, self.returned = False self.ast_builder = ast_builder self.visited_funcdef = False + self.is_real_function = is_real_function # e.g.: FunctionDef, Module, Global def variable_scope_guard(self): diff --git a/python/taichi/lang/expr.py b/python/taichi/lang/expr.py index 8c7f771666a99..05582cd6be9cb 100644 --- a/python/taichi/lang/expr.py +++ b/python/taichi/lang/expr.py @@ -3,7 +3,8 @@ from taichi.lang import impl from taichi.lang.common_ops import TaichiOperations from taichi.lang.exception import TaichiTypeError -from taichi.lang.util import is_taichi_class +from taichi.lang.util import is_taichi_class, to_numpy_type, to_taichi_type +from taichi.types.primitive_types import integer_types, real_types # Scalar, basic data type @@ -30,7 +31,7 @@ def __init__(self, *args, tb=None, dtype=None): "Only 0-dimensional numpy array can be used to initialize a scalar expression" ) arg = arg.dtype.type(arg) - self.ptr = impl.make_constant_expr(arg, dtype).ptr + self.ptr = make_constant_expr(arg, dtype).ptr else: assert False if self.tb: @@ -47,6 +48,50 @@ def __repr__(self): return '' +def _check_in_range(npty, val): + iif = np.iinfo(npty) + if not iif.min <= val <= iif.max: + # This isn't the case we want to deal with: |val| does't fall into the valid range of either + # the signed or the unsigned type. + raise TaichiTypeError( + f'Constant {val} has exceeded the range of {to_taichi_type(npty)}: [{iif.min}, {iif.max}]' + ) + + +def _clamp_unsigned_to_range(npty, val): + # npty: np.int32 or np.int64 + iif = np.iinfo(npty) + if iif.min <= val <= iif.max: + return val + cap = (1 << iif.bits) + assert 0 <= val < cap + new_val = val - cap + return new_val + + +def make_constant_expr(val, dtype): + if isinstance(val, (int, np.integer)): + constant_dtype = impl.get_runtime( + ).default_ip if dtype is None else dtype + if constant_dtype not in integer_types: + raise TaichiTypeError( + 'Integer literals must be annotated with a integer type. For type casting, use `ti.cast`.' + ) + _check_in_range(to_numpy_type(constant_dtype), val) + return Expr( + _ti_core.make_const_expr_int( + constant_dtype, _clamp_unsigned_to_range(np.int64, val))) + if isinstance(val, (float, np.floating)): + constant_dtype = impl.get_runtime( + ).default_fp if dtype is None else dtype + if constant_dtype not in real_types: + raise TaichiTypeError( + 'Floating-point literals must be annotated with a floating-point type. For type casting, use `ti.cast`.' + ) + return Expr(_ti_core.make_const_expr_fp(constant_dtype, val)) + raise TaichiTypeError(f'Invalid constant scalar data type: {type(val)}') + + def make_var_list(size): exprs = [] for _ in range(size): diff --git a/python/taichi/lang/impl.py b/python/taichi/lang/impl.py index 426c973809f88..742cccf31792c 100644 --- a/python/taichi/lang/impl.py +++ b/python/taichi/lang/impl.py @@ -4,12 +4,11 @@ import numpy as np from taichi._lib import core as _ti_core -from taichi._logging import error from taichi._snode.fields_builder import FieldsBuilder from taichi.lang._ndarray import ScalarNdarray from taichi.lang._ndrange import GroupedNDRange, _Ndrange from taichi.lang.any_array import AnyArray, AnyArrayAccess -from taichi.lang.exception import TaichiRuntimeError, TaichiTypeError +from taichi.lang.exception import TaichiRuntimeError from taichi.lang.expr import Expr, make_expr_group from taichi.lang.field import Field, ScalarField from taichi.lang.kernel_arguments import SparseMatrixProxy @@ -23,8 +22,7 @@ from taichi.lang.struct import Struct, StructField, _IntermediateStruct from taichi.lang.tape import TapeImpl from taichi.lang.util import (cook_dtype, get_traceback, is_taichi_class, - python_scope, taichi_scope, to_numpy_type, - to_taichi_type, warning) + python_scope, taichi_scope, warning) from taichi.types.primitive_types import f16, f32, f64, i32, i64 @@ -117,12 +115,6 @@ def begin_frontend_if(ast_builder, cond): ast_builder.begin_frontend_if(Expr(cond).ptr) -def wrap_scalar(x): - if type(x) in [int, float]: - return Expr(x) - return x - - @taichi_scope def subscript(value, *_indices, skip_reordered=False): if isinstance(value, np.ndarray): @@ -247,7 +239,6 @@ def __init__(self, kernels=None): self.current_kernel = None self.global_vars = [] self.matrix_fields = [] - self.experimental_real_function = False self.default_fp = f32 self.default_ip = i32 self.target_tape = None @@ -364,47 +355,6 @@ def get_runtime(): return pytaichi -def _check_in_range(npty, val): - iif = np.iinfo(npty) - if not iif.min <= val <= iif.max: - # This isn't the case we want to deal with: |val| does't fall into the valid range of either - # the signed or the unsigned type. - error( - f'Constant {val} has exceeded the range of {to_taichi_type(npty)}: [{iif.min}, {iif.max}]' - ) - - -def _clamp_unsigned_to_range(npty, val): - # npty: np.int32 or np.int64 - iif = np.iinfo(npty) - if iif.min <= val <= iif.max: - return val - cap = (1 << iif.bits) - assert 0 <= val < cap - new_val = val - cap - return new_val - - -@taichi_scope -def make_constant_expr_i32(val): - assert isinstance(val, (int, np.integer)) - return Expr(_ti_core.make_const_expr_int(i32, val)) - - -@taichi_scope -def make_constant_expr(val, dtype): - if isinstance(val, (int, np.integer)): - constant_dtype = pytaichi.default_ip if dtype is None else dtype - _check_in_range(to_numpy_type(constant_dtype), val) - return Expr( - _ti_core.make_const_expr_int( - constant_dtype, _clamp_unsigned_to_range(np.int64, val))) - if isinstance(val, (float, np.floating)): - constant_dtype = pytaichi.default_fp if dtype is None else dtype - return Expr(_ti_core.make_const_expr_fp(constant_dtype, val)) - raise TaichiTypeError(f'Invalid constant scalar data type: {type(val)}') - - def reset(): global pytaichi old_kernels = pytaichi.kernels diff --git a/python/taichi/lang/kernel_arguments.py b/python/taichi/lang/kernel_arguments.py index d8ad5dca414fc..aa1bf8a3c7324 100644 --- a/python/taichi/lang/kernel_arguments.py +++ b/python/taichi/lang/kernel_arguments.py @@ -1,6 +1,6 @@ import taichi.lang from taichi._lib import core as _ti_core -from taichi.lang import impl +from taichi.lang import impl, ops from taichi.lang.any_array import AnyArray from taichi.lang.enums import Layout from taichi.lang.expr import Expr @@ -10,30 +10,31 @@ class SparseMatrixEntry: - def __init__(self, ptr, i, j): + def __init__(self, ptr, i, j, dtype): self.ptr = ptr self.i = i self.j = j + self.dtype = dtype def _augassign(self, value, op): + call_func = f"insert_triplet_{self.dtype}" if op == 'Add': - taichi.lang.impl.call_internal("insert_triplet", self.ptr, self.i, - self.j, - taichi.lang.impl.ti_float(value)) + taichi.lang.impl.call_internal(call_func, self.ptr, self.i, self.j, + ops.cast(value, self.dtype)) elif op == 'Sub': - taichi.lang.impl.call_internal("insert_triplet", self.ptr, self.i, - self.j, - -taichi.lang.impl.ti_float(value)) + taichi.lang.impl.call_internal(call_func, self.ptr, self.i, self.j, + -ops.cast(value, self.dtype)) else: assert False, "Only operations '+=' and '-=' are supported on sparse matrices." class SparseMatrixProxy: - def __init__(self, ptr): + def __init__(self, ptr, dtype): self.ptr = ptr + self.dtype = dtype def subscript(self, i, j): - return SparseMatrixEntry(self.ptr, i, j) + return SparseMatrixEntry(self.ptr, i, j, self.dtype) def decl_scalar_arg(dtype): @@ -48,11 +49,13 @@ def decl_matrix_arg(matrixtype): for _ in range(matrixtype.n)]) -def decl_sparse_matrix(): +def decl_sparse_matrix(dtype): + value_type = cook_dtype(dtype) ptr_type = cook_dtype(u64) # Treat the sparse matrix argument as a scalar since we only need to pass in the base pointer arg_id = impl.get_runtime().prog.decl_arg(ptr_type, False) - return SparseMatrixProxy(_ti_core.make_arg_load_expr(arg_id, ptr_type)) + return SparseMatrixProxy(_ti_core.make_arg_load_expr(arg_id, ptr_type), + value_type) def decl_any_arr_arg(dtype, dim, element_shape, layout): diff --git a/python/taichi/lang/kernel_impl.py b/python/taichi/lang/kernel_impl.py index 2378295b61ddc..ee6c549be3d92 100644 --- a/python/taichi/lang/kernel_impl.py +++ b/python/taichi/lang/kernel_impl.py @@ -27,7 +27,7 @@ import torch -def func(fn): +def func(fn, is_real_function=False): """Marks a function as callable in Taichi-scope. This decorator transforms a Python function into a Taichi one. Taichi @@ -35,6 +35,7 @@ def func(fn): Args: fn (Callable): The Python function to be decorated + is_real_function (bool): Whether the function is a real function Returns: Callable: The decorated function @@ -51,16 +52,21 @@ def func(fn): """ is_classfunc = _inside_class(level_of_class_stackframe=3) - fun = Func(fn, _classfunc=is_classfunc) + fun = Func(fn, _classfunc=is_classfunc, is_real_function=is_real_function) @functools.wraps(fn) def decorated(*args): return fun.__call__(*args) decorated._is_taichi_function = True + decorated._is_real_function = is_real_function return decorated +def real_func(fn): + return func(fn, is_real_function=True) + + def pyfunc(fn): """Marks a function as callable in both Taichi and Python scopes. @@ -92,7 +98,8 @@ def _get_tree_and_ctx(self, is_kernel=True, arg_features=None, args=None, - ast_builder=None): + ast_builder=None, + is_real_function=False): file = oinspect.getsourcefile(self.func) src, start_lineno = oinspect.getsourcelines(self.func) src = [textwrap.fill(line, tabsize=4, width=9999) for line in src] @@ -111,7 +118,7 @@ def _get_tree_and_ctx(self, if isinstance(func_body.returns, ast.Name): global_vars[func_body.returns.id] = self.return_type - if is_kernel or impl.get_runtime().experimental_real_function: + if is_kernel or is_real_function: # inject template parameters into globals for i in self.template_slot_locations: template_var_name = self.argument_names[i] @@ -126,19 +133,25 @@ def _get_tree_and_ctx(self, src=src, start_lineno=start_lineno, file=file, - ast_builder=ast_builder) + ast_builder=ast_builder, + is_real_function=is_real_function) class Func: function_counter = 0 - def __init__(self, _func, _classfunc=False, _pyfunc=False): + def __init__(self, + _func, + _classfunc=False, + _pyfunc=False, + is_real_function=False): self.func = _func self.func_id = Func.function_counter Func.function_counter += 1 self.compiled = None self.classfunc = _classfunc self.pyfunc = _pyfunc + self.is_real_function = is_real_function self.argument_annotations = [] self.argument_names = [] self.return_type = None @@ -158,7 +171,7 @@ def __call__(self, *args): "Taichi functions cannot be called from Python-scope.") return self.func(*args) - if impl.get_runtime().experimental_real_function: + if self.is_real_function: if impl.get_runtime().current_kernel.is_grad: raise TaichiSyntaxError( "Real function in gradient kernels unsupported.") @@ -174,9 +187,10 @@ def __call__(self, *args): self, is_kernel=False, args=args, - ast_builder=impl.get_runtime().prog.current_ast_builder()) + ast_builder=impl.get_runtime().prog.current_ast_builder(), + is_real_function=self.is_real_function) ret = transform_tree(tree, ctx) - if not impl.get_runtime().experimental_real_function: + if not self.is_real_function: if self.return_type and not ctx.returned: raise TaichiSyntaxError( "Function has a return type but does not have a return statement" @@ -185,7 +199,7 @@ def __call__(self, *args): def func_call_rvalue(self, key, args): # Skip the template args, e.g., |self| - assert impl.get_runtime().experimental_real_function + assert self.is_real_function non_template_args = [] for i, anno in enumerate(self.argument_annotations): if not isinstance(anno, template): @@ -196,7 +210,10 @@ def func_call_rvalue(self, key, args): self.taichi_functions[key.instance_id], non_template_args)) def do_compile(self, key, args): - tree, ctx = _get_tree_and_ctx(self, is_kernel=False, args=args) + tree, ctx = _get_tree_and_ctx(self, + is_kernel=False, + args=args, + is_real_function=self.is_real_function) fn = impl.get_runtime().prog.create_function(key) def func_body(): @@ -236,8 +253,7 @@ def extract_arguments(self): annotation = template() # TODO: pyfunc also need type annotation check when real function is enabled, # but that has to happen at runtime when we know which scope it's called from. - elif not self.pyfunc and impl.get_runtime( - ).experimental_real_function: + elif not self.pyfunc and self.is_real_function: raise TaichiSyntaxError( f'Taichi function `{self.func.__name__}` parameter `{arg_name}` must be type annotated' ) @@ -305,6 +321,8 @@ def extract_arg(arg, anno): element_dim] if layout == Layout.SOA else shape[ -element_dim:] return to_taichi_type(arg.dtype), len(shape), element_shape, layout + if isinstance(anno, sparse_matrix_builder): + return arg.dtype # Use '#' as a placeholder because other kinds of arguments are not involved in template instantiation return '#' @@ -457,7 +475,7 @@ def taichi_ast_generator(kernel_cxx): try: ctx.ast_builder = kernel_cxx.ast_builder() transform_tree(tree, ctx) - if not impl.get_runtime().experimental_real_function: + if not ctx.is_real_function: if self.return_type and not ctx.returned: raise TaichiSyntaxError( "Kernel has a return type but does not have a return statement" @@ -545,7 +563,7 @@ def func__(*args): provided) launch_ctx.set_arg_int(actual_argument_slot, int(v)) elif isinstance(needed, sparse_matrix_builder): - # Pass only the base pointer of the ti.linalg.sparse_matrix_builder() argument + # Pass only the base pointer of the ti.types.sparse_matrix_builder() argument launch_ctx.set_arg_int(actual_argument_slot, v._get_addr()) elif isinstance(needed, any_arr) and isinstance( v, taichi.lang._ndarray.Ndarray): diff --git a/python/taichi/lang/matrix.py b/python/taichi/lang/matrix.py index b79b8fcb539a4..02efa2e6aedbd 100644 --- a/python/taichi/lang/matrix.py +++ b/python/taichi/lang/matrix.py @@ -76,8 +76,8 @@ def __init__(self, n=1, m=1, dt=None, suppress_warning=False): mat.append( list([ impl.make_tensor_element_expr( - self.local_tensor_proxy, - (impl.make_constant_expr_i32(i), ), + self.local_tensor_proxy, (expr.Expr( + i, dtype=primitive_types.i32), ), (len(n), ), self.dynamic_index_stride) ])) else: # now init a Matrix @@ -119,8 +119,8 @@ def __init__(self, n=1, m=1, dt=None, suppress_warning=False): mat[i].append( impl.make_tensor_element_expr( self.local_tensor_proxy, - (impl.make_constant_expr_i32(i), - impl.make_constant_expr_i32(j)), + (expr.Expr(i, dtype=primitive_types.i32), + expr.Expr(j, dtype=primitive_types.i32)), (len(n), len(n[0])), self.dynamic_index_stride)) self.n = len(mat) diff --git a/python/taichi/lang/misc.py b/python/taichi/lang/misc.py index f682c39d26e5a..5ad52fd460a18 100644 --- a/python/taichi/lang/misc.py +++ b/python/taichi/lang/misc.py @@ -240,7 +240,6 @@ class _SpecialConfig: def __init__(self): self.log_level = 'info' self.gdb_trigger = False - self.experimental_real_function = False self.short_circuit_operators = False @@ -383,7 +382,6 @@ def init(arch=None, # submodule configurations (spec_cfg): env_spec.add('log_level', str) env_spec.add('gdb_trigger') - env_spec.add('experimental_real_function') env_spec.add('short_circuit_operators') # compiler configurations (ti.cfg): @@ -405,8 +403,6 @@ def init(arch=None, # dispatch configurations that are not in ti.cfg: if not _test_mode: _ti_core.set_core_trigger_gdb_when_crash(spec_cfg.gdb_trigger) - impl.get_runtime().experimental_real_function = \ - spec_cfg.experimental_real_function impl.get_runtime().short_circuit_operators = \ spec_cfg.short_circuit_operators _logging.set_logging_level(spec_cfg.log_level.lower()) diff --git a/python/taichi/linalg/sparse_matrix.py b/python/taichi/linalg/sparse_matrix.py index 4dab4a0ab8ee8..928aec91d1382 100644 --- a/python/taichi/linalg/sparse_matrix.py +++ b/python/taichi/linalg/sparse_matrix.py @@ -143,9 +143,10 @@ def __init__(self, dtype=f32): self.num_rows = num_rows self.num_cols = num_cols if num_cols else num_rows + self.dtype = dtype if num_rows is not None: self.ptr = get_runtime().prog.create_sparse_matrix_builder( - num_rows, num_cols, max_num_triplets) + num_rows, num_cols, max_num_triplets, dtype) def _get_addr(self): """Get the address of the sparse matrix""" diff --git a/taichi/aot/module_loader.cpp b/taichi/aot/module_loader.cpp index d3436cbf3effc..9d168d3055da3 100644 --- a/taichi/aot/module_loader.cpp +++ b/taichi/aot/module_loader.cpp @@ -1,18 +1,26 @@ #include "taichi/aot/module_loader.h" +#include "taichi/backends/vulkan/aot_module_loader_impl.h" +#include "taichi/backends/metal/aot_module_loader_impl.h" + namespace taichi { namespace lang { namespace aot { -Kernel *ModuleLoader::get_kernel(const std::string &name) { - auto itr = loaded_kernels_.find(name); - if (itr != loaded_kernels_.end()) { - return itr->second.get(); +std::unique_ptr Module::load(const std::string &path, + Arch arch, + std::any mod_params) { + if (arch == Arch::vulkan) { +#ifdef TI_WITH_VULKAN + return vulkan::make_aot_module(mod_params); +#endif + } else if (arch == Arch::metal) { +#ifdef TI_WITH_METAL + return metal::make_aot_module(mod_params); +#endif + } else { + TI_NOT_IMPLEMENTED; } - auto k = make_new_kernel(name); - auto *kptr = k.get(); - loaded_kernels_[name] = std::move(k); - return kptr; } } // namespace aot diff --git a/taichi/aot/module_loader.h b/taichi/aot/module_loader.h index 303a3bffddbbd..634f8462e92ec 100644 --- a/taichi/aot/module_loader.h +++ b/taichi/aot/module_loader.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -13,10 +14,21 @@ namespace taichi { namespace lang { -class RuntimeContext; +struct RuntimeContext; namespace aot { +class TI_DLL_EXPORT Field { + public: + // Rule of 5 to make MSVC happy + Field() = default; + virtual ~Field() = default; + Field(const Field &) = delete; + Field &operator=(const Field &) = delete; + Field(Field &&) = default; + Field &operator=(Field &&) = default; +}; + class TI_DLL_EXPORT Kernel { public: // Rule of 5 to make MSVC happy @@ -37,30 +49,25 @@ class TI_DLL_EXPORT Kernel { virtual void launch(RuntimeContext *ctx) = 0; }; -class TI_DLL_EXPORT ModuleLoader { +class TI_DLL_EXPORT Module { public: // Rule of 5 to make MSVC happy - ModuleLoader() = default; - virtual ~ModuleLoader() = default; - ModuleLoader(const ModuleLoader &) = delete; - ModuleLoader &operator=(const ModuleLoader &) = delete; - ModuleLoader(ModuleLoader &&) = default; - ModuleLoader &operator=(ModuleLoader &&) = default; - - // TODO: Add method get_kernel(...) once the kernel field data will be - // generic/common across all backends. - - virtual bool get_field(const std::string &name, - aot::CompiledFieldData &field) = 0; - - /** - * @brief Get the kernel object - * - * @param name Name of the kernel - * @return Kernel* - */ - Kernel *get_kernel(const std::string &name); - + Module() = default; + virtual ~Module() = default; + Module(const Module &) = delete; + Module &operator=(const Module &) = delete; + Module(Module &&) = default; + Module &operator=(Module &&) = default; + + static std::unique_ptr load(const std::string &path, + Arch arch, + std::any mod_params); + + // Module metadata + virtual Arch arch() const = 0; + virtual uint64_t version() const = 0; + virtual std::unique_ptr get_kernel(const std::string &name) = 0; + virtual std::unique_ptr get_field(const std::string &name) = 0; virtual size_t get_root_size() const = 0; protected: diff --git a/taichi/backends/cpu/cpu_device.h b/taichi/backends/cpu/cpu_device.h index 2309d1b9edc5e..5d5ccfd5e5ff1 100644 --- a/taichi/backends/cpu/cpu_device.h +++ b/taichi/backends/cpu/cpu_device.h @@ -5,7 +5,7 @@ #include #include "taichi/common/core.h" -#include "taichi/backends/device.h" +#include "taichi/llvm/llvm_device.h" #include "taichi/system/virtual_memory.h" namespace taichi { @@ -75,7 +75,7 @@ class CpuStream : public Stream { void command_sync() override{TI_NOT_IMPLEMENTED}; }; -class CpuDevice : public Device { +class CpuDevice : public LlvmDevice { public: struct AllocInfo { void *ptr{nullptr}; diff --git a/taichi/backends/cuda/cuda_caching_allocator.cpp b/taichi/backends/cuda/cuda_caching_allocator.cpp index e857aabad37ce..cb037af88f14b 100644 --- a/taichi/backends/cuda/cuda_caching_allocator.cpp +++ b/taichi/backends/cuda/cuda_caching_allocator.cpp @@ -4,11 +4,12 @@ namespace taichi { namespace lang { namespace cuda { -CudaCachingAllocator::CudaCachingAllocator(Device *device) : device_(device) { +CudaCachingAllocator::CudaCachingAllocator(LlvmDevice *device) + : device_(device) { } uint64_t *CudaCachingAllocator::allocate( - const Device::LlvmRuntimeAllocParams ¶ms) { + const LlvmDevice::LlvmRuntimeAllocParams ¶ms) { uint64_t *ret{nullptr}; auto size_aligned = taichi::iroundup(params.size, taichi_page_size); auto it_blk = mem_blocks_.lower_bound(size_aligned); diff --git a/taichi/backends/cuda/cuda_caching_allocator.h b/taichi/backends/cuda/cuda_caching_allocator.h index 25bf804f05430..14af5a493792f 100644 --- a/taichi/backends/cuda/cuda_caching_allocator.h +++ b/taichi/backends/cuda/cuda_caching_allocator.h @@ -1,8 +1,8 @@ #pragma once -#include "taichi/backends/device.h" #include "taichi/common/core.h" #include "taichi/math/arithmetic.h" +#include "taichi/llvm/llvm_device.h" #include #include @@ -12,14 +12,14 @@ namespace cuda { class CudaCachingAllocator { public: - CudaCachingAllocator(Device *device); + CudaCachingAllocator(LlvmDevice *device); - uint64_t *allocate(const Device::LlvmRuntimeAllocParams ¶ms); + uint64_t *allocate(const LlvmDevice::LlvmRuntimeAllocParams ¶ms); void release(size_t sz, uint64_t *ptr); private: std::multimap mem_blocks_; - Device *device_{nullptr}; + LlvmDevice *device_{nullptr}; }; } // namespace cuda diff --git a/taichi/backends/cuda/cuda_device.h b/taichi/backends/cuda/cuda_device.h index 96d742ff43d52..039c17b012061 100644 --- a/taichi/backends/cuda/cuda_device.h +++ b/taichi/backends/cuda/cuda_device.h @@ -6,7 +6,7 @@ #include "taichi/backends/cuda/cuda_driver.h" #include "taichi/backends/cuda/cuda_caching_allocator.h" #include "taichi/backends/cuda/cuda_context.h" -#include "taichi/backends/device.h" +#include "taichi/llvm/llvm_device.h" namespace taichi { namespace lang { @@ -75,7 +75,7 @@ class CudaStream : public Stream { void command_sync() override{TI_NOT_IMPLEMENTED}; }; -class CudaDevice : public Device { +class CudaDevice : public LlvmDevice { public: struct AllocInfo { void *ptr{nullptr}; diff --git a/taichi/backends/cuda/cupti_toolkit.cpp b/taichi/backends/cuda/cupti_toolkit.cpp index 9a57b20363cbe..5c4244fba6222 100644 --- a/taichi/backends/cuda/cupti_toolkit.cpp +++ b/taichi/backends/cuda/cupti_toolkit.cpp @@ -22,7 +22,7 @@ enum class CuptiMetricsDefault : uint { CUPTI_METRIC_DEFAULT_TOTAL = 2 }; -constexpr const char *MetricListDefault[] = { +[[maybe_unused]] constexpr const char *MetricListDefault[] = { "smsp__cycles_elapsed.avg", // CUPTI_METRIC_KERNEL_ELAPSED_CLK_NUMS "smsp__cycles_elapsed.avg.per_second", // CUPTI_METRIC_CORE_FREQUENCY_HZS }; diff --git a/taichi/backends/cuda/cupti_toolkit.h b/taichi/backends/cuda/cupti_toolkit.h index f7e50632435ac..98a5c3309730f 100644 --- a/taichi/backends/cuda/cupti_toolkit.h +++ b/taichi/backends/cuda/cupti_toolkit.h @@ -38,8 +38,8 @@ class CuptiToolkit { void set_status(bool enable); private: - bool enabled_{false}; - CuptiConfig cupti_config_; + [[maybe_unused]] bool enabled_{false}; + [[maybe_unused]] CuptiConfig cupti_config_; CuptiImage cupti_image_; }; diff --git a/taichi/backends/device.cpp b/taichi/backends/device.cpp index 6dba7f1680eff..550f762b5ab4b 100644 --- a/taichi/backends/device.cpp +++ b/taichi/backends/device.cpp @@ -140,15 +140,6 @@ void Device::print_all_cap() const { } } -uint64_t *Device::allocate_llvm_runtime_memory_jit( - const LlvmRuntimeAllocParams ¶ms) { - params.runtime_jit->call( - "runtime_memory_allocate_aligned", params.runtime, params.size, - taichi_page_size); - return taichi_union_cast_with_different_sizes(fetch_result_uint64( - taichi_result_buffer_runtime_query_id, params.result_buffer)); -} - void GraphicsDevice::image_transition(DeviceAllocation img, ImageLayout old_layout, ImageLayout new_layout) { diff --git a/taichi/backends/device.h b/taichi/backends/device.h index 72c4736f98115..82b2788f43c64 100644 --- a/taichi/backends/device.h +++ b/taichi/backends/device.h @@ -407,25 +407,10 @@ class Device { AllocUsage usage{AllocUsage::Storage}; }; - struct LlvmRuntimeAllocParams : AllocParams { - bool use_cached{true}; - JITModule *runtime_jit{nullptr}; - LLVMRuntime *runtime{nullptr}; - uint64 *result_buffer{nullptr}; - }; - virtual DeviceAllocation allocate_memory(const AllocParams ¶ms) = 0; - virtual DeviceAllocation allocate_memory_runtime( - const LlvmRuntimeAllocParams ¶ms) { - TI_NOT_IMPLEMENTED - } - virtual void dealloc_memory(DeviceAllocation handle) = 0; - uint64_t *allocate_llvm_runtime_memory_jit( - const LlvmRuntimeAllocParams ¶ms); - virtual uint64_t get_memory_physical_pointer(DeviceAllocation handle) { TI_NOT_IMPLEMENTED } diff --git a/taichi/backends/metal/aot_module_loader_impl.cpp b/taichi/backends/metal/aot_module_loader_impl.cpp index 0975f52c818d0..a02192c6c7115 100644 --- a/taichi/backends/metal/aot_module_loader_impl.cpp +++ b/taichi/backends/metal/aot_module_loader_impl.cpp @@ -23,9 +23,9 @@ class KernelImpl : public aot::Kernel { const std::string kernel_name_; }; -class AotModuleLoaderImpl : public aot::ModuleLoader { +class AotModuleImpl : public aot::Module { public: - explicit AotModuleLoaderImpl(const AotModuleParams ¶ms) + explicit AotModuleImpl(const AotModuleParams ¶ms) : runtime_(params.runtime) { const std::string bin_path = fmt::format("{}/metadata.tcb", params.module_path); @@ -36,16 +36,26 @@ class AotModuleLoaderImpl : public aot::ModuleLoader { } } - bool get_field(const std::string &name, - aot::CompiledFieldData &field) override { - TI_ERROR("AOT: get_field for Metal not implemented yet"); - return false; + std::unique_ptr get_kernel(const std::string &name) override { + return make_new_kernel(name); + } + + std::unique_ptr get_field(const std::string &name) override { + TI_NOT_IMPLEMENTED; } size_t get_root_size() const override { return aot_data_.metadata.root_buffer_size; } + // Module metadata + Arch arch() const override { + return Arch::metal; + } + uint64_t version() const override { + TI_NOT_IMPLEMENTED; + } + private: std::unique_ptr make_new_kernel( const std::string &name) override { @@ -68,9 +78,9 @@ class AotModuleLoaderImpl : public aot::ModuleLoader { } // namespace -std::unique_ptr make_aot_module_loader( - const AotModuleParams ¶ms) { - return std::make_unique(params); +std::unique_ptr make_aot_module(std::any mod_params) { + AotModuleParams params = std::any_cast(mod_params); + return std::make_unique(params); } } // namespace metal diff --git a/taichi/backends/metal/aot_module_loader_impl.h b/taichi/backends/metal/aot_module_loader_impl.h index 86d9a0bab35b3..04cc71f494f78 100644 --- a/taichi/backends/metal/aot_module_loader_impl.h +++ b/taichi/backends/metal/aot_module_loader_impl.h @@ -17,9 +17,7 @@ struct AotModuleParams { KernelManager *runtime{nullptr}; }; -std::unique_ptr make_aot_module_loader( - const AotModuleParams ¶ms); - +std::unique_ptr make_aot_module(std::any mod_params); } // namespace metal } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/aot_module_loader_impl.cpp b/taichi/backends/vulkan/aot_module_loader_impl.cpp index ef85c8768715e..249d0fea570f0 100644 --- a/taichi/backends/vulkan/aot_module_loader_impl.cpp +++ b/taichi/backends/vulkan/aot_module_loader_impl.cpp @@ -26,76 +26,103 @@ class KernelImpl : public aot::Kernel { VkRuntime *const runtime_; const KernelHandle handle_; }; -} // namespace -AotModuleLoaderImpl::AotModuleLoaderImpl(const std::string &output_dir) { - const std::string bin_path = fmt::format("{}/metadata.tcb", output_dir); - read_from_binary_file(ti_aot_data_, bin_path); - for (int i = 0; i < ti_aot_data_.kernels.size(); ++i) { - auto k = ti_aot_data_.kernels[i]; - std::vector> spirv_sources_codes; - for (int j = 0; j < k.tasks_attribs.size(); ++j) { - std::vector res = read_spv_file(output_dir, k.tasks_attribs[j]); - spirv_sources_codes.push_back(res); +class AotModuleImpl : public aot::Module { + public: + explicit AotModuleImpl(const AotModuleParams ¶ms) + : runtime_(params.runtime) { + const std::string bin_path = + fmt::format("{}/metadata.tcb", params.module_path); + read_from_binary_file(ti_aot_data_, bin_path); + + for (int i = 0; i < ti_aot_data_.kernels.size(); ++i) { + auto k = ti_aot_data_.kernels[i]; + + std::vector> spirv_sources_codes; + for (int j = 0; j < k.tasks_attribs.size(); ++j) { + std::vector res = + read_spv_file(params.module_path, k.tasks_attribs[j]); + spirv_sources_codes.push_back(res); + } + ti_aot_data_.spirv_codes.push_back(spirv_sources_codes); } - ti_aot_data_.spirv_codes.push_back(spirv_sources_codes); } -} -std::vector AotModuleLoaderImpl::read_spv_file( - const std::string &output_dir, - const TaskAttributes &k) { - const std::string spv_path = fmt::format("{}/{}.spv", output_dir, k.name); - std::vector source_code; - std::ifstream fs(spv_path, std::ios_base::binary | std::ios::ate); - size_t size = fs.tellg(); - fs.seekg(0, std::ios::beg); - source_code.resize(size / sizeof(uint32_t)); - fs.read((char *)source_code.data(), size); - fs.close(); - return source_code; -} + std::unique_ptr get_kernel(const std::string &name) override { + return make_new_kernel(name); + } + + std::unique_ptr get_field(const std::string &name) override { + TI_NOT_IMPLEMENTED; + } + + size_t get_root_size() const override { + return ti_aot_data_.root_buffer_size; + } + + // Module metadata + Arch arch() const override { + return Arch::vulkan; + } + uint64_t version() const override { + TI_NOT_IMPLEMENTED; + } -bool AotModuleLoaderImpl::get_kernel(const std::string &name, - VkRuntime::RegisterParams &kernel) { - for (int i = 0; i < ti_aot_data_.kernels.size(); ++i) { - // Offloaded task names encode more than the name of the function, but for - // AOT, only use the name of the function which should be the first part of - // the struct - if (ti_aot_data_.kernels[i].name.rfind(name, 0) == 0) { - kernel.kernel_attribs = ti_aot_data_.kernels[i]; - kernel.task_spirv_source_codes = ti_aot_data_.spirv_codes[i]; - // We don't have to store the number of SNodeTree in |ti_aot_data_| yet, - // because right now we only support a single SNodeTree during AOT. - // TODO: Support multiple SNodeTrees in AOT. - kernel.num_snode_trees = 1; - return true; + private: + bool get_kernel_params_by_name(const std::string &name, + VkRuntime::RegisterParams &kernel) { + for (int i = 0; i < ti_aot_data_.kernels.size(); ++i) { + // Offloaded task names encode more than the name of the function, but for + // AOT, only use the name of the function which should be the first part + // of the struct + if (ti_aot_data_.kernels[i].name.rfind(name, 0) == 0) { + kernel.kernel_attribs = ti_aot_data_.kernels[i]; + kernel.task_spirv_source_codes = ti_aot_data_.spirv_codes[i]; + // We don't have to store the number of SNodeTree in |ti_aot_data_| yet, + // because right now we only support a single SNodeTree during AOT. + // TODO: Support multiple SNodeTrees in AOT. + kernel.num_snode_trees = 1; + return true; + } } + return false; } - return false; -} + std::unique_ptr make_new_kernel( + const std::string &name) override { + VkRuntime::RegisterParams kparams; + if (!get_kernel_params_by_name(name, kparams)) { + TI_DEBUG("Failed to load kernel {}", name); + return nullptr; + } + auto handle = runtime_->register_taichi_kernel(kparams); + return std::make_unique(runtime_, handle); + } -std::unique_ptr AotModuleLoaderImpl::make_new_kernel( - const std::string &name) { - VkRuntime::RegisterParams kparams; - if (!get_kernel(name, kparams)) { - TI_DEBUG("Failed to load kernel {}", name); - return nullptr; + std::vector read_spv_file(const std::string &output_dir, + const TaskAttributes &k) { + const std::string spv_path = fmt::format("{}/{}.spv", output_dir, k.name); + std::vector source_code; + std::ifstream fs(spv_path, std::ios_base::binary | std::ios::ate); + size_t size = fs.tellg(); + fs.seekg(0, std::ios::beg); + source_code.resize(size / sizeof(uint32_t)); + fs.read((char *)source_code.data(), size); + fs.close(); + return source_code; } - auto handle = runtime_->register_taichi_kernel(kparams); - return std::make_unique(runtime_, handle); -} -bool AotModuleLoaderImpl::get_field(const std::string &name, - aot::CompiledFieldData &field) { - TI_ERROR("AOT: get_field for Vulkan not implemented yet"); - return false; -} + TaichiAotData ti_aot_data_; + VkRuntime *runtime_{nullptr}; +}; -size_t AotModuleLoaderImpl::get_root_size() const { - return ti_aot_data_.root_buffer_size; +} // namespace + +std::unique_ptr make_aot_module(std::any mod_params) { + AotModuleParams params = std::any_cast(mod_params); + return std::make_unique(params); } + } // namespace vulkan } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/aot_module_loader_impl.h b/taichi/backends/vulkan/aot_module_loader_impl.h index 48687b655e681..37e28d01f6388 100644 --- a/taichi/backends/vulkan/aot_module_loader_impl.h +++ b/taichi/backends/vulkan/aot_module_loader_impl.h @@ -15,27 +15,12 @@ namespace vulkan { class VkRuntime; -class TI_DLL_EXPORT AotModuleLoaderImpl : public aot::ModuleLoader { - public: - explicit AotModuleLoaderImpl(const std::string &output_dir); - - bool get_kernel(const std::string &name, VkRuntime::RegisterParams &kernel); - - bool get_field(const std::string &name, - aot::CompiledFieldData &field) override; - - size_t get_root_size() const override; - - private: - std::unique_ptr make_new_kernel( - const std::string &name) override; - std::vector read_spv_file(const std::string &output_dir, - const TaskAttributes &k); - - TaichiAotData ti_aot_data_; - VkRuntime *runtime_{nullptr}; +struct AotModuleParams { + std::string module_path; + VkRuntime *runtime{nullptr}; }; +std::unique_ptr make_aot_module(std::any mod_params); } // namespace vulkan } // namespace lang } // namespace taichi diff --git a/taichi/backends/vulkan/runtime.h b/taichi/backends/vulkan/runtime.h index aa92abe8af3c8..d862dcf5cf755 100644 --- a/taichi/backends/vulkan/runtime.h +++ b/taichi/backends/vulkan/runtime.h @@ -64,7 +64,7 @@ class CompiledTaichiKernel { TaichiKernelAttributes ti_kernel_attribs_; std::vector tasks_attribs_; - Device *device_; + [[maybe_unused]] Device *device_; InputBuffersMap input_buffers_; diff --git a/taichi/codegen/spirv/spirv_codegen.cpp b/taichi/codegen/spirv/spirv_codegen.cpp index 7350433c94608..821ccb12b91f6 100644 --- a/taichi/codegen/spirv/spirv_codegen.cpp +++ b/taichi/codegen/spirv/spirv_codegen.cpp @@ -1517,8 +1517,6 @@ class TaskCodegen : public IRVisitor { task_attribs_.advisory_total_num_threads = total_num_cells; int num_cells = snode->num_cells_per_container; - int upper_level_cells = total_num_cells / num_cells; - TI_INFO("ListGen {} * {}", total_num_cells / num_cells, num_cells); auto listgen_buffer = diff --git a/taichi/common/exceptions.h b/taichi/common/exceptions.h index 600b0de12a3e7..8cdb4cfe40223 100644 --- a/taichi/common/exceptions.h +++ b/taichi/common/exceptions.h @@ -5,26 +5,27 @@ namespace lang { class IRModified {}; -class TaichiTypeError : public std::exception { +class TaichiExceptionImpl : public std::exception { std::string msg_; public: - TaichiTypeError(const std::string msg) : msg_(msg) { + TaichiExceptionImpl(const std::string msg) : msg_(msg) { } const char *what() const throw() override { return msg_.c_str(); } }; -class TaichiSyntaxError : public std::exception { - std::string msg_; +class TaichiTypeError : public TaichiExceptionImpl { + using TaichiExceptionImpl::TaichiExceptionImpl; +}; - public: - TaichiSyntaxError(const std::string msg) : msg_(msg) { - } - const char *what() const throw() override { - return msg_.c_str(); - } +class TaichiSyntaxError : public TaichiExceptionImpl { + using TaichiExceptionImpl::TaichiExceptionImpl; +}; + +class TaichiRuntimeError : public TaichiExceptionImpl { + using TaichiExceptionImpl::TaichiExceptionImpl; }; } // namespace lang diff --git a/taichi/ir/frontend_ir.cpp b/taichi/ir/frontend_ir.cpp index 2e18ad7c76b05..cbbf7c089d0b2 100644 --- a/taichi/ir/frontend_ir.cpp +++ b/taichi/ir/frontend_ir.cpp @@ -909,19 +909,19 @@ void ASTBuilder::insert_expr_stmt(const Expr &val) { void ASTBuilder::create_scope(std::unique_ptr &list, LoopType tp) { TI_ASSERT(list == nullptr); - list = std::make_unique(); - if (!stack_.empty()) { - list->parent_stmt = get_last_stmt(); - } - stack_.push_back(list.get()); LoopState prev = loop_state_stack_.back(); if (tp == NotLoop) { loop_state_stack_.push_back(prev); - } else if (tp == For && prev == None) { + } else if (tp == For && stack_.size() == 1) { loop_state_stack_.push_back(Outermost); } else { loop_state_stack_.push_back(Inner); } + list = std::make_unique(); + if (!stack_.empty()) { + list->parent_stmt = get_last_stmt(); + } + stack_.push_back(list.get()); } void ASTBuilder::pop_scope() { diff --git a/taichi/ir/snode.cpp b/taichi/ir/snode.cpp index a6be09c4a0372..b5b715884868c 100644 --- a/taichi/ir/snode.cpp +++ b/taichi/ir/snode.cpp @@ -50,7 +50,10 @@ SNode &SNode::create_node(std::vector axes, auto &new_node = insert_children(type); for (int i = 0; i < (int)axes.size(); i++) { - TI_ASSERT(sizes[i] > 0); + if (sizes[i] <= 0) { + throw TaichiRuntimeError( + "Every dimension of a Taichi field should be positive"); + } auto &ind = axes[i]; new_node.extractors[ind.value].activate( bit::log2int(bit::least_pot_bound(sizes[i]))); diff --git a/taichi/ir/statements.cpp b/taichi/ir/statements.cpp index 5f2b1821dc64b..2da47acc5ed28 100644 --- a/taichi/ir/statements.cpp +++ b/taichi/ir/statements.cpp @@ -305,11 +305,11 @@ MeshForStmt::MeshForStmt(mesh::Mesh *mesh, int num_cpu_threads, int block_dim) : mesh(mesh), - major_from_type(element_type), body(std::move(body)), bit_vectorize(bit_vectorize), num_cpu_threads(num_cpu_threads), - block_dim(block_dim) { + block_dim(block_dim), + major_from_type(element_type) { this->body->parent_stmt = this; TI_STMT_REG_FIELDS; } diff --git a/taichi/jit/jit_session.cpp b/taichi/jit/jit_session.cpp index a979f55bf4ab9..dd9547589949b 100644 --- a/taichi/jit/jit_session.cpp +++ b/taichi/jit/jit_session.cpp @@ -33,6 +33,7 @@ std::unique_ptr JITSession::create(LlvmProgramImpl *llvm_prog, #else TI_ERROR("Llvm disabled"); #endif + return nullptr; } TLANG_NAMESPACE_END diff --git a/taichi/llvm/llvm_device.cpp b/taichi/llvm/llvm_device.cpp new file mode 100644 index 0000000000000..5643dd9dd16f2 --- /dev/null +++ b/taichi/llvm/llvm_device.cpp @@ -0,0 +1,16 @@ +#include "taichi/llvm/llvm_device.h" + +namespace taichi { +namespace lang { + +uint64_t *LlvmDevice::allocate_llvm_runtime_memory_jit( + const LlvmRuntimeAllocParams ¶ms) { + params.runtime_jit->call( + "runtime_memory_allocate_aligned", params.runtime, params.size, + taichi_page_size); + return taichi_union_cast_with_different_sizes(fetch_result_uint64( + taichi_result_buffer_runtime_query_id, params.result_buffer)); +} + +} // namespace lang +} // namespace taichi diff --git a/taichi/llvm/llvm_device.h b/taichi/llvm/llvm_device.h new file mode 100644 index 0000000000000..e692f68521459 --- /dev/null +++ b/taichi/llvm/llvm_device.h @@ -0,0 +1,27 @@ +#pragma once + +#include "taichi/backends/device.h" + +namespace taichi { +namespace lang { + +class LlvmDevice : public Device { + public: + struct LlvmRuntimeAllocParams : AllocParams { + bool use_cached{true}; + JITModule *runtime_jit{nullptr}; + LLVMRuntime *runtime{nullptr}; + uint64 *result_buffer{nullptr}; + }; + + virtual DeviceAllocation allocate_memory_runtime( + const LlvmRuntimeAllocParams ¶ms) { + TI_NOT_IMPLEMENTED; + } + + uint64_t *allocate_llvm_runtime_memory_jit( + const LlvmRuntimeAllocParams ¶ms); +}; + +} // namespace lang +} // namespace taichi diff --git a/taichi/llvm/llvm_program.cpp b/taichi/llvm/llvm_program.cpp index b28202da03b18..b0d7e83b790c2 100755 --- a/taichi/llvm/llvm_program.cpp +++ b/taichi/llvm/llvm_program.cpp @@ -573,6 +573,11 @@ cpu::CpuDevice *LlvmProgramImpl::cpu_device() { return static_cast(device_.get()); } +LlvmDevice *LlvmProgramImpl::llvm_device() { + TI_ASSERT(dynamic_cast(device_.get())); + return static_cast(device_.get()); +} + DevicePtr LlvmProgramImpl::get_snode_tree_device_ptr(int tree_id) { DeviceAllocation tree_alloc = snode_tree_allocs_[tree_id]; return tree_alloc.get_ptr(); @@ -588,7 +593,7 @@ DeviceAllocation LlvmProgramImpl::allocate_memory_ndarray( tlctx = llvm_context_host_.get(); } - return get_compute_device()->allocate_memory_runtime( + return llvm_device()->allocate_memory_runtime( {{alloc_size, /*host_write=*/false, /*host_read=*/false, /*export_sharing=*/false, AllocUsage::Storage}, config->ndarray_use_cached_allocator, diff --git a/taichi/llvm/llvm_program.h b/taichi/llvm/llvm_program.h index 21d3d89dd8a43..72ef3c111b42f 100644 --- a/taichi/llvm/llvm_program.h +++ b/taichi/llvm/llvm_program.h @@ -1,4 +1,5 @@ #pragma once +#include "taichi/llvm/llvm_device.h" #include "taichi/system/snode_tree_buffer_manager.h" #include "taichi/inc/constants.h" #include "taichi/program/compile_config.h" @@ -174,6 +175,7 @@ class LlvmProgramImpl : public ProgramImpl { std::shared_ptr device_{nullptr}; cuda::CudaDevice *cuda_device(); cpu::CpuDevice *cpu_device(); + LlvmDevice *llvm_device(); }; } // namespace lang } // namespace taichi diff --git a/taichi/program/ndarray.cpp b/taichi/program/ndarray.cpp index 196fda01fe989..a54923d353ade 100644 --- a/taichi/program/ndarray.cpp +++ b/taichi/program/ndarray.cpp @@ -16,7 +16,6 @@ Ndarray::Ndarray(Program *prog, const std::vector &shape) : dtype(type), shape(shape), - prog_impl_(prog->get_llvm_program_impl()), num_active_indices(shape.size()), nelement_(std::accumulate(std::begin(shape), std::end(shape), @@ -24,6 +23,7 @@ Ndarray::Ndarray(Program *prog, std::multiplies<>())), element_size_(data_type_size(dtype)), device_(prog->get_device_shared()), + prog_impl_(prog->get_llvm_program_impl()), rw_accessors_bank_(&prog->get_ndarray_rw_accessors_bank()) { ndarray_alloc_ = prog->allocate_memory_ndarray(nelement_ * element_size_, prog->result_buffer); diff --git a/taichi/program/program.cpp b/taichi/program/program.cpp index 269ca30092642..48fd81c676000 100644 --- a/taichi/program/program.cpp +++ b/taichi/program/program.cpp @@ -81,8 +81,12 @@ Program::Program(Arch desired_arch) TI_ERROR("This taichi is not compiled with LLVM"); #endif } else if (config.arch == Arch::metal) { +#ifdef TI_WITH_METAL TI_ASSERT(metal::is_metal_api_available()); program_impl_ = std::make_unique(config); +#else + TI_ERROR("This taichi is not compiled with Metal") +#endif } else if (config.arch == Arch::vulkan) { #ifdef TI_WITH_VULKAN TI_ASSERT(vulkan::is_vulkan_api_available()); diff --git a/taichi/program/sparse_matrix.cpp b/taichi/program/sparse_matrix.cpp index a42ab8d205f71..4d2a26f32ce41 100644 --- a/taichi/program/sparse_matrix.cpp +++ b/taichi/program/sparse_matrix.cpp @@ -10,34 +10,53 @@ namespace lang { SparseMatrixBuilder::SparseMatrixBuilder(int rows, int cols, - int max_num_triplets) - : rows_(rows), cols_(cols), max_num_triplets_(max_num_triplets) { - data_.reserve(max_num_triplets * 3); - data_base_ptr_ = get_data_base_ptr(); -} - -void *SparseMatrixBuilder::get_data_base_ptr() { - return data_.data(); -} - -void SparseMatrixBuilder::print_triplets() { - fmt::print("n={}, m={}, num_triplets={} (max={})", rows_, cols_, + int max_num_triplets, + DataType dtype) + : rows_(rows), + cols_(cols), + max_num_triplets_(max_num_triplets), + dtype_(dtype) { + auto element_size = data_type_size(dtype); + TI_ASSERT((element_size == 4 || element_size == 8)); + data_base_ptr_ = + std::make_unique(max_num_triplets_ * 3 * element_size); +} + +template +void SparseMatrixBuilder::print_template() { + fmt::print("n={}, m={}, num_triplets={} (max={})\n", rows_, cols_, num_triplets_, max_num_triplets_); + T *data = reinterpret_cast(data_base_ptr_.get()); for (int64 i = 0; i < num_triplets_; i++) { - fmt::print("({}, {}) val={}", data_[i * 3], data_[i * 3 + 1], - taichi_union_cast(data_[i * 3 + 2])); + fmt::print("({}, {}) val={}\n", ((G *)data)[i * 3], ((G *)data)[i * 3 + 1], + taichi_union_cast(data[i * 3 + 2])); } fmt::print("\n"); } -SparseMatrix SparseMatrixBuilder::build() { - TI_ASSERT(built_ == false); - built_ = true; - using T = Eigen::Triplet; - std::vector triplets; +void SparseMatrixBuilder::print_triplets() { + auto element_size = data_type_size(dtype_); + switch (element_size) { + case 4: + print_template(); + break; + case 8: + print_template(); + break; + default: + TI_ERROR("Unsupported sparse matrix data type!"); + break; + } +} + +template +SparseMatrix SparseMatrixBuilder::build_template() { + using V = Eigen::Triplet; + std::vector triplets; + T *data = reinterpret_cast(data_base_ptr_.get()); for (int i = 0; i < num_triplets_; i++) { - triplets.push_back(T(data_[i * 3], data_[i * 3 + 1], - taichi_union_cast(data_[i * 3 + 2]))); + triplets.push_back(V(((G *)data)[i * 3], ((G *)data)[i * 3 + 1], + taichi_union_cast(data[i * 3 + 2]))); } SparseMatrix sm(rows_, cols_); sm.get_matrix().setFromTriplets(triplets.begin(), triplets.end()); @@ -45,6 +64,21 @@ SparseMatrix SparseMatrixBuilder::build() { return sm; } +SparseMatrix SparseMatrixBuilder::build() { + TI_ASSERT(built_ == false); + built_ = true; + auto element_size = data_type_size(dtype_); + switch (element_size) { + case 4: + return build_template(); + case 8: + return build_template(); + default: + TI_ERROR("Unsupported sparse matrix data type!"); + break; + } +} + void SparseMatrixBuilder::clear() { built_ = false; num_triplets_ = 0; diff --git a/taichi/program/sparse_matrix.h b/taichi/program/sparse_matrix.h index 2de867d1ce249..f5baa62539912 100644 --- a/taichi/program/sparse_matrix.h +++ b/taichi/program/sparse_matrix.h @@ -2,6 +2,8 @@ #include "taichi/common/core.h" #include "taichi/inc/constants.h" +#include "taichi/ir/type_utils.h" + #include "Eigen/Sparse" namespace taichi { @@ -11,9 +13,7 @@ class SparseMatrix; class SparseMatrixBuilder { public: - SparseMatrixBuilder(int rows, int cols, int max_num_triplets); - - void *get_data_base_ptr(); + SparseMatrixBuilder(int rows, int cols, int max_num_triplets, DataType dtype); void print_triplets(); @@ -21,14 +21,21 @@ class SparseMatrixBuilder { void clear(); + private: + template + void print_template(); + + template + SparseMatrix build_template(); + private: uint64 num_triplets_{0}; - void *data_base_ptr_{nullptr}; - std::vector data_; + std::unique_ptr data_base_ptr_{nullptr}; int rows_{0}; int cols_{0}; uint64 max_num_triplets_{0}; bool built_{false}; + DataType dtype_{PrimitiveType::f32}; }; class SparseMatrix { diff --git a/taichi/python/export_lang.cpp b/taichi/python/export_lang.cpp index 0d52388c295e7..a8cbfa1bafe33 100644 --- a/taichi/python/export_lang.cpp +++ b/taichi/python/export_lang.cpp @@ -64,6 +64,8 @@ void export_lang(py::module &m) { PyExc_TypeError); py::register_exception(m, "TaichiSyntaxError", PyExc_SyntaxError); + py::register_exception(m, "TaichiRuntimeError", + PyExc_RuntimeError); py::enum_(m, "Arch", py::arithmetic()) #define PER_ARCH(x) .value(#x, Arch::x) #include "taichi/inc/archs.inc.h" @@ -357,10 +359,11 @@ void export_lang(py::module &m) { .def("create_function", &Program::create_function, py::return_value_policy::reference) .def("create_sparse_matrix_builder", - [](Program *program, int n, int m, uint64 max_num_entries) { + [](Program *program, int n, int m, uint64 max_num_entries, + DataType dtype) { TI_ERROR_IF(!arch_is_cpu(program->config.arch), "SparseMatrix only supports CPU for now."); - return SparseMatrixBuilder(n, m, max_num_entries); + return SparseMatrixBuilder(n, m, max_num_entries, dtype); }) .def("create_sparse_matrix", [](Program *program, int n, int m) { diff --git a/taichi/runtime/llvm/internal_functions.h b/taichi/runtime/llvm/internal_functions.h index b8451635204ed..591ccddc64bcf 100644 --- a/taichi/runtime/llvm/internal_functions.h +++ b/taichi/runtime/llvm/internal_functions.h @@ -9,6 +9,17 @@ } \ } while (0) +#define ATOMIC_INSERT(T) \ + do { \ + auto base_ptr = (int64 *)base_ptr_; \ + int64 *num_triplets = base_ptr; \ + auto data_base_ptr = *(T **)(base_ptr + 1); \ + auto triplet_id = atomic_add_i64(num_triplets, 1); \ + data_base_ptr[triplet_id * 3] = i; \ + data_base_ptr[triplet_id * 3 + 1] = j; \ + data_base_ptr[triplet_id * 3 + 2] = taichi_union_cast(value); \ + } while (0); + i32 do_nothing(RuntimeContext *context) { return 0; } @@ -20,20 +31,21 @@ i32 refresh_counter(RuntimeContext *context) { return 0; } -i32 insert_triplet(RuntimeContext *context, - int64 base_ptr_, - int i, - int j, - float value) { - auto base_ptr = (int64 *)base_ptr_; - - int64 *num_triplets = base_ptr; - auto data_base_ptr = *(int32 **)(base_ptr + 1); +i32 insert_triplet_f32(RuntimeContext *context, + int64 base_ptr_, + int i, + int j, + float value) { + ATOMIC_INSERT(int32); + return 0; +} - auto triplet_id = atomic_add_i64(num_triplets, 1); - data_base_ptr[triplet_id * 3] = i; - data_base_ptr[triplet_id * 3 + 1] = j; - data_base_ptr[triplet_id * 3 + 2] = taichi_union_cast(value); +i32 insert_triplet_f64(RuntimeContext *context, + int64 base_ptr_, + int i, + int j, + float64 value) { + ATOMIC_INSERT(int64); return 0; } diff --git a/taichi/transforms/lower_ast.cpp b/taichi/transforms/lower_ast.cpp index 88995f468a217..81daee000c54d 100644 --- a/taichi/transforms/lower_ast.cpp +++ b/taichi/transforms/lower_ast.cpp @@ -33,6 +33,7 @@ class LowerAST : public IRVisitor { Stmt *capturing_loop_; std::unordered_set detected_fors_with_break_; Block *current_block_; + int current_block_depth_; FlattenContext make_flatten_ctx() { FlattenContext fctx; @@ -43,7 +44,8 @@ class LowerAST : public IRVisitor { public: explicit LowerAST(const std::unordered_set &_detected_fors_with_break) : detected_fors_with_break_(_detected_fors_with_break), - current_block_(nullptr) { + current_block_(nullptr), + current_block_depth_(0) { // TODO: change this to false allow_undefined_visitor = true; capturing_loop_ = nullptr; @@ -53,9 +55,11 @@ class LowerAST : public IRVisitor { auto backup_block = this->current_block_; this->current_block_ = stmt_list; auto stmts = make_raw_pointer_list(stmt_list->statements); + current_block_depth_++; for (auto &stmt : stmts) { stmt->accept(this); } + current_block_depth_--; this->current_block_ = backup_block; } @@ -201,8 +205,8 @@ class LowerAST : public IRVisitor { flatten_rvalue(begin, &fctx); flatten_rvalue(end, &fctx); bool is_good_range_for = - capturing_loop_ == nullptr || detected_fors_with_break_.find(stmt) == - detected_fors_with_break_.end(); + current_block_depth_ == 1 || detected_fors_with_break_.find(stmt) == + detected_fors_with_break_.end(); // #578: a good range for is a range for that doesn't contains a break // statement if (is_good_range_for) { diff --git a/taichi/transforms/make_mesh_block_local.cpp b/taichi/transforms/make_mesh_block_local.cpp index bb4699237eefa..98f5319141a71 100644 --- a/taichi/transforms/make_mesh_block_local.cpp +++ b/taichi/transforms/make_mesh_block_local.cpp @@ -400,7 +400,7 @@ void MakeMeshBlockLocal::fetch_mapping( MakeMeshBlockLocal::MakeMeshBlockLocal(OffloadedStmt *offload, const CompileConfig &config) - : offload_(offload), config_(config) { + : config_(config), offload_(offload) { // Step 0: simplify l2g + g2r -> l2r simplify_nested_conversion(); diff --git a/taichi/transforms/offload.cpp b/taichi/transforms/offload.cpp index 00abf97230691..135e0c7053e16 100644 --- a/taichi/transforms/offload.cpp +++ b/taichi/transforms/offload.cpp @@ -672,7 +672,7 @@ class FixCrossOffloadReferences : public BasicStmtVisitor { } private: - const CompileConfig &config_; + [[maybe_unused]] const CompileConfig &config_; StmtToOffsetMap local_to_global_offset_; std::unordered_map stmt_to_offloaded_; OffloadedRanges *const offloaded_ranges_; diff --git a/tests/cpp/aot/aot_save_load_test.cpp b/tests/cpp/aot/aot_save_load_test.cpp index 5ffff6ae87600..45885d7e28bf8 100644 --- a/tests/cpp/aot/aot_save_load_test.cpp +++ b/tests/cpp/aot/aot_save_load_test.cpp @@ -4,13 +4,17 @@ #include "taichi/program/program.h" #ifdef TI_WITH_VULKAN #include "taichi/backends/vulkan/aot_module_loader_impl.h" +#include "taichi/backends/device.h" +#include "taichi/backends/vulkan/vulkan_device.h" +#include "taichi/backends/vulkan/vulkan_device_creator.h" #include "taichi/backends/vulkan/vulkan_loader.h" +#include "taichi/backends/vulkan/vulkan_utils.h" #endif using namespace taichi; using namespace lang; -static void aot_save() { +[[maybe_unused]] static void aot_save() { auto program = Program(Arch::vulkan); program.config.advanced_optimization = false; @@ -91,19 +95,59 @@ TEST(AotSaveLoad, Vulkan) { aot_save(); - vulkan::AotModuleLoaderImpl aot_loader("."); - vulkan::VkRuntime::RegisterParams init_kernel, ret_kernel; + // API based on proposal https://github.com/taichi-dev/taichi/issues/3642 + // Initialize Vulkan program + taichi::uint64 *result_buffer{nullptr}; + taichi::lang::RuntimeContext host_ctx; + auto memory_pool = + std::make_unique(Arch::vulkan, nullptr); + result_buffer = (taichi::uint64 *)memory_pool->allocate( + sizeof(taichi::uint64) * taichi_result_buffer_entries, 8); + + // Create Taichi Device for computation + lang::vulkan::VulkanDeviceCreator::Params evd_params; + evd_params.api_version = + taichi::lang::vulkan::VulkanEnvSettings::kApiVersion(); + auto embedded_device = + std::make_unique(evd_params); + + // Create Vulkan runtime + vulkan::VkRuntime::Params params; + params.host_result_buffer = result_buffer; + params.device = embedded_device->device(); + auto vulkan_runtime = + std::make_unique(std::move(params)); + + // Run AOT module loader + vulkan::AotModuleParams mod_params; + mod_params.module_path = "."; + mod_params.runtime = vulkan_runtime.get(); + + std::unique_ptr vk_module = + aot::Module::load(".", Arch::vulkan, mod_params); + EXPECT_TRUE(vk_module); + + // Retrieve kernels/fields/etc from AOT module to initialize runtime + auto root_size = vk_module->get_root_size(); + EXPECT_EQ(root_size, 64); + vulkan_runtime->add_root_buffer(root_size); - auto ret = aot_loader.get_kernel("init", init_kernel); - EXPECT_TRUE(ret); + auto init_kernel = vk_module->get_kernel("init"); + EXPECT_TRUE(init_kernel); - ret = aot_loader.get_kernel("ret", ret_kernel); - EXPECT_TRUE(ret); + auto ret_kernel = vk_module->get_kernel("ret"); + EXPECT_TRUE(ret_kernel); - ret = aot_loader.get_kernel("ret2", ret_kernel); - EXPECT_FALSE(ret); + auto ret2_kernel = vk_module->get_kernel("ret2"); + EXPECT_FALSE(ret2_kernel); - auto root_size = aot_loader.get_root_size(); - EXPECT_EQ(root_size, 64); + // Run kernels + init_kernel->launch(&host_ctx); + ret_kernel->launch(&host_ctx); + vulkan_runtime->synchronize(); + + // auto x_field = vk_module.get_field("x"); + // EXPECT_TRUE(x_field); + // x_field.copy_to(/*dst=*/x.get()); } #endif diff --git a/tests/cpp/transforms/extract_constant_test.cpp b/tests/cpp/transforms/extract_constant_test.cpp index b579f9178a429..4e4693d3bc501 100644 --- a/tests/cpp/transforms/extract_constant_test.cpp +++ b/tests/cpp/transforms/extract_constant_test.cpp @@ -26,7 +26,7 @@ TEST_F(ExtractConstantTest, ExtractConstant) { builder.set_insertion_point_to_loop_begin(for_stmt); auto *x = builder.create_local_var(get_data_type()); auto *x_v = builder.create_local_load(x); - auto *sum = builder.create_add(x_v, builder.get_int32(1)); + builder.create_add(x_v, builder.get_int32(1)); auto ir = builder.extract_ir(); ASSERT_TRUE(ir->is()); diff --git a/tests/python/test_api.py b/tests/python/test_api.py index 52862e66c4328..bd732a8152146 100644 --- a/tests/python/test_api.py +++ b/tests/python/test_api.py @@ -18,21 +18,22 @@ 'atomic_min', 'atomic_or', 'atomic_sub', 'atomic_xor', 'axes', 'bit_cast', 'bit_shr', 'block_dim', 'block_local', 'cache_read_only', 'cast', 'cc', 'ceil', 'clear_all_gradients', 'cos', 'cpu', 'cuda', 'data_oriented', - 'deactivate', 'deactivate_all_snodes', 'dx11', 'eig', 'exp', 'ext_arr', - 'extension', 'f16', 'f32', 'f64', 'field', 'float16', 'float32', 'float64', - 'floor', 'func', 'get_addr', 'global_thread_idx', 'gpu', 'grouped', - 'hex_to_rgb', 'i', 'i16', 'i32', 'i64', 'i8', 'ij', 'ijk', 'ijkl', 'ijl', - 'ik', 'ikl', 'il', 'init', 'int16', 'int32', 'int64', 'int8', 'is_active', - 'is_logging_effective', 'j', 'jk', 'jkl', 'jl', 'k', 'kernel', 'kl', 'l', - 'lang', 'length', 'linalg', 'log', 'max', 'mesh_local', 'mesh_patch_idx', - 'metal', 'min', 'ndarray', 'ndrange', 'no_activate', 'one', 'opengl', - 'parallelize', 'polar_decompose', 'pow', 'profiler', 'randn', 'random', - 'raw_div', 'raw_mod', 'rescale_index', 'reset', 'rgb_to_hex', 'root', - 'round', 'rsqrt', 'select', 'set_logging_level', 'sin', - 'sparse_matrix_builder', 'sqrt', 'static', 'static_assert', 'static_print', - 'stop_grad', 'svd', 'sym_eig', 'sync', 'tan', 'tanh', 'template', 'tools', - 'types', 'u16', 'u32', 'u64', 'u8', 'ui', 'uint16', 'uint32', 'uint64', - 'uint8', 'vulkan', 'wasm', 'x64', 'x86_64', 'zero' + 'deactivate', 'deactivate_all_snodes', 'dx11', 'eig', 'exp', + 'experimental', 'ext_arr', 'extension', 'f16', 'f32', 'f64', 'field', + 'float16', 'float32', 'float64', 'floor', 'func', 'get_addr', + 'global_thread_idx', 'gpu', 'grouped', 'hex_to_rgb', 'i', 'i16', 'i32', + 'i64', 'i8', 'ij', 'ijk', 'ijkl', 'ijl', 'ik', 'ikl', 'il', 'init', + 'int16', 'int32', 'int64', 'int8', 'is_active', 'is_logging_effective', + 'j', 'jk', 'jkl', 'jl', 'k', 'kernel', 'kl', 'l', 'lang', 'length', + 'linalg', 'log', 'max', 'mesh_local', 'mesh_patch_idx', 'metal', 'min', + 'ndarray', 'ndrange', 'no_activate', 'one', 'opengl', 'parallelize', + 'polar_decompose', 'pow', 'profiler', 'randn', 'random', 'raw_div', + 'raw_mod', 'rescale_index', 'reset', 'rgb_to_hex', 'root', 'round', + 'rsqrt', 'select', 'set_logging_level', 'sin', 'sparse_matrix_builder', + 'sqrt', 'static', 'static_assert', 'static_print', 'stop_grad', 'svd', + 'sym_eig', 'sync', 'tan', 'tanh', 'template', 'tools', 'types', 'u16', + 'u32', 'u64', 'u8', 'ui', 'uint16', 'uint32', 'uint64', 'uint8', 'vulkan', + 'wasm', 'x64', 'x86_64', 'zero' ] user_api[ti.Field] = [ 'copy_from', 'dtype', 'fill', 'from_numpy', 'from_torch', 'parent', diff --git a/tests/python/test_field.py b/tests/python/test_field.py index 7f1bcd1fbcf0c..06af7ed63a0ac 100644 --- a/tests/python/test_field.py +++ b/tests/python/test_field.py @@ -181,3 +181,11 @@ def test_field_copy_from_with_non_filed_object(): other = np.zeros((2, 3)) with pytest.raises(TypeError): x.copy_from(other) + + +@test_utils.test() +def test_field_shape_0(): + with pytest.raises( + ti._lib.core.TaichiRuntimeError, + match="Every dimension of a Taichi field should be positive"): + x = ti.field(dtype=ti.f32, shape=0) diff --git a/tests/python/test_function.py b/tests/python/test_function.py index eac5d4395c627..adf27083336cf 100644 --- a/tests/python/test_function.py +++ b/tests/python/test_function.py @@ -4,11 +4,11 @@ from tests import test_utils -@test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) +@test_utils.test(arch=[ti.cpu, ti.gpu]) def test_function_without_return(): x = ti.field(ti.i32, shape=()) - @ti.func + @ti.experimental.real_func def foo(val: ti.i32): x[None] += val @@ -22,13 +22,11 @@ def run(): assert x[None] == 42 -@test_utils.test(experimental_real_function=True, - arch=[ti.cpu, ti.gpu], - debug=True) +@test_utils.test(arch=[ti.cpu, ti.gpu], debug=True) def test_function_with_return(): x = ti.field(ti.i32, shape=()) - @ti.func + @ti.experimental.real_func def foo(val: ti.i32) -> ti.i32: x[None] += val return val @@ -44,113 +42,25 @@ def run(): assert x[None] == 42 -# @test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) -# def test_function_with_multiple_last_return(): -# x = ti.field(ti.i32, shape=()) -# -# @ti.func -# def foo(val: ti.i32) -> ti.i32: -# if x[None]: -# x[None] += val * 2 -# return val * 2 -# else: -# x[None] += val -# return val -# -# @ti.kernel -# def run(): -# a = foo(40) -# foo(1) -# assert a == 40 -# -# x[None] = 0 -# run() -# assert x[None] == 42 -# -# -# @test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) -# def test_call_expressions(): -# x = ti.field(ti.i32, shape=()) -# -# @ti.func -# def foo(val: ti.i32) -> ti.i32: -# if x[None] > 10: -# x[None] += 1 -# x[None] += val -# return 0 -# -# @ti.kernel -# def run(): -# assert foo(15) == 0 -# assert foo(10) == 0 -# -# x[None] = 0 -# run() -# assert x[None] == 26 -# -# -# @test_utils.test(arch=ti.cpu, experimental_real_function=True) -# def test_failing_multiple_return(): -# x = ti.field(ti.i32, shape=()) -# -# @ti.func -# def foo(val: ti.i32) -> ti.i32: -# if x[None] > 10: -# if x[None] > 20: -# return 1 -# x[None] += 1 -# x[None] += val -# return 0 -# -# @ti.kernel -# def run(): -# assert foo(15) == 0 -# assert foo(10) == 0 -# assert foo(100) == 1 -# -# with pytest.raises(AssertionError): -# x[None] = 0 -# run() -# assert x[None] == 26 - -# -# @test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) -# def test_python_function(): -# x = ti.field(ti.i32, shape=()) -# -# @ti.func -# def inc(val: ti.i32): -# x[None] += val -# -# def identity(x): -# return x -# -# @ti.data_oriented -# class A: -# def __init__(self): -# self.count = ti.field(ti.i32, shape=()) -# self.count[None] = 0 -# -# @ti.lang.kernel_impl.pyfunc -# def dec(self, val: ti.i32) -> ti.i32: -# self.count[None] += 1 -# x[None] -= val -# return self.count[None] -# -# @ti.kernel -# def run(self) -> ti.i32: -# a = self.dec(1) -# identity(2) -# inc(identity(3)) -# return a -# -# a = A() -# x[None] = 0 -# assert a.run() == 1 -# assert a.run() == 2 -# assert x[None] == 4 -# assert a.dec(4) == 3 -# assert x[None] == 0 +@test_utils.test(arch=[ti.cpu, ti.gpu]) +def test_call_expressions(): + x = ti.field(ti.i32, shape=()) + + @ti.experimental.real_func + def foo(val: ti.i32) -> ti.i32: + if x[None] > 10: + x[None] += 1 + x[None] += val + return 0 + + @ti.kernel + def run(): + assert foo(15) == 0 + assert foo(10) == 0 + + x[None] = 0 + run() + assert x[None] == 26 @test_utils.test(arch=[ti.cpu, ti.cuda], debug=True) @@ -220,7 +130,7 @@ def run_func(): run_func() -@test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) +@test_utils.test(arch=[ti.cpu, ti.gpu]) def test_experimental_templates(): x = ti.field(ti.i32, shape=()) y = ti.field(ti.i32, shape=()) @@ -240,7 +150,7 @@ def run_kernel(): assert x[None] == 11 assert y[None] == 21 - @ti.func + @ti.experimental.real_func def inc(x: ti.template()): x[None] += 1 @@ -266,21 +176,21 @@ def verify(): verify() -@test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) +@test_utils.test(arch=[ti.cpu, ti.gpu]) def test_missing_arg_annotation(): with pytest.raises(ti.TaichiSyntaxError, match='must be type annotated'): - @ti.func + @ti.experimental.real_func def add(a, b: ti.i32) -> ti.i32: return a + b -@test_utils.test(experimental_real_function=True, arch=[ti.cpu, ti.gpu]) +@test_utils.test(arch=[ti.cpu, ti.gpu]) def test_missing_return_annotation(): with pytest.raises(ti.TaichiCompilationError, match='return value must be annotated'): - @ti.func + @ti.experimental.real_func def add(a: ti.i32, b: ti.i32): return a + b diff --git a/tests/python/test_literal.py b/tests/python/test_literal.py index f62822b9c8239..88ac3827df213 100644 --- a/tests/python/test_literal.py +++ b/tests/python/test_literal.py @@ -23,7 +23,9 @@ def test_literal_multi_args_error(): def multi_args_error(): a = ti.i64(1, 2) - with pytest.raises(ti.TaichiSyntaxError): + with pytest.raises( + ti.TaichiSyntaxError, + match="Type annotation can only be given to a single literal."): multi_args_error() @@ -33,7 +35,9 @@ def test_literal_keywords_error(): def keywords_error(): a = ti.f64(1, x=2) - with pytest.raises(ti.TaichiSyntaxError): + with pytest.raises( + ti.TaichiSyntaxError, + match="Type annotation can only be given to a single literal."): keywords_error() @@ -44,5 +48,35 @@ def expr_error(): a = 1 b = ti.f16(a) - with pytest.raises(ti.TaichiSyntaxError): + with pytest.raises( + ti.TaichiSyntaxError, + match="Type annotation can only be given to a single literal."): expr_error() + + +@test_utils.test() +def test_literal_int_annotation_error(): + @ti.kernel + def int_annotation_error(): + a = ti.f32(0) + + with pytest.raises( + ti.TaichiTypeError, + match= + "Integer literals must be annotated with a integer type. For type casting, use `ti.cast`." + ): + int_annotation_error() + + +@test_utils.test() +def test_literal_float_annotation_error(): + @ti.kernel + def float_annotation_error(): + a = ti.i32(0.0) + + with pytest.raises( + ti.TaichiTypeError, + match= + "Floating-point literals must be annotated with a floating-point type. For type casting, use `ti.cast`." + ): + float_annotation_error() diff --git a/tests/python/test_loops.py b/tests/python/test_loops.py index 588bf302d34ca..81b0456df8cd3 100644 --- a/tests/python/test_loops.py +++ b/tests/python/test_loops.py @@ -172,3 +172,18 @@ def func(): x[None] = 1 func() assert x[None] == 1 + + +@test_utils.test() +def test_break_in_outermost_for_not_in_outermost_scope(): + @ti.kernel + def foo() -> ti.i32: + a = 0 + if True: + for i in range(1000): + if i == 100: + break + a += 1 + return a + + assert foo() == 100 diff --git a/tests/python/test_sparse_matrix.py b/tests/python/test_sparse_matrix.py index 64576cefd8f25..179cbefedc31a 100644 --- a/tests/python/test_sparse_matrix.py +++ b/tests/python/test_sparse_matrix.py @@ -1,14 +1,20 @@ +import pytest + import taichi as ti from tests import test_utils +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_builder_deprecated_anno(): +def test_sparse_matrix_builder_deprecated_anno(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel - def fill(Abuilder: ti.linalg.sparse_matrix_builder()): + def fill(Abuilder: ti.types.sparse_matrix_builder()): for i, j in ti.ndrange(n, n): Abuilder[i, j] += i + j @@ -19,10 +25,14 @@ def fill(Abuilder: ti.linalg.sparse_matrix_builder()): assert A[i, j] == i + j +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_builder(): +def test_sparse_matrix_builder(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder()): @@ -36,10 +46,14 @@ def fill(Abuilder: ti.types.sparse_matrix_builder()): assert A[i, j] == i + j +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_shape(): +def test_sparse_matrix_shape(dtype): n, m = 8, 9 - Abuilder = ti.linalg.SparseMatrixBuilder(n, m, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + m, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder()): @@ -51,10 +65,14 @@ def fill(Abuilder: ti.types.sparse_matrix_builder()): assert A.shape() == (n, m) +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_element_access(): +def test_sparse_matrix_element_access(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder()): @@ -67,10 +85,14 @@ def fill(Abuilder: ti.types.sparse_matrix_builder()): assert A[i, i] == i +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_element_modify(): +def test_sparse_matrix_element_modify(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder()): @@ -83,11 +105,18 @@ def fill(Abuilder: ti.types.sparse_matrix_builder()): assert A[0, 0] == 1024.0 +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_addition(): +def test_sparse_matrix_addition(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) - Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) + Bbuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder(), @@ -105,11 +134,18 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), assert C[i, j] == 2 * i +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_subtraction(): +def test_sparse_matrix_subtraction(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) - Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) + Bbuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder(), @@ -127,10 +163,14 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), assert C[i, j] == 2 * j +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_scalar_multiplication(): +def test_sparse_matrix_scalar_multiplication(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder()): @@ -145,10 +185,14 @@ def fill(Abuilder: ti.types.sparse_matrix_builder()): assert B[i, j] == 3 * (i + j) +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_transpose(): +def test_sparse_matrix_transpose(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder()): @@ -163,11 +207,18 @@ def fill(Abuilder: ti.types.sparse_matrix_builder()): assert B[i, j] == A[j, i] +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_elementwise_multiplication(): +def test_sparse_matrix_elementwise_multiplication(dtype): n = 8 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) - Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) + Bbuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder(), @@ -185,11 +236,18 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), assert C[i, j] == (i + j) * (i - j) +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_multiplication(): +def test_sparse_matrix_multiplication(dtype): n = 2 - Abuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) - Bbuilder = ti.linalg.SparseMatrixBuilder(n, n, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) + Bbuilder = ti.linalg.SparseMatrixBuilder(n, + n, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder(), @@ -208,11 +266,18 @@ def fill(Abuilder: ti.types.sparse_matrix_builder(), assert C[1, 1] == -1.0 +@pytest.mark.parametrize('dtype', [ti.f32, ti.f64]) @test_utils.test(arch=ti.cpu) -def test_sparse_matrix_nonsymmetric_multiplication(): +def test_sparse_matrix_nonsymmetric_multiplication(dtype): n, k, m = 2, 3, 4 - Abuilder = ti.linalg.SparseMatrixBuilder(n, k, max_num_triplets=100) - Bbuilder = ti.linalg.SparseMatrixBuilder(k, m, max_num_triplets=100) + Abuilder = ti.linalg.SparseMatrixBuilder(n, + k, + max_num_triplets=100, + dtype=dtype) + Bbuilder = ti.linalg.SparseMatrixBuilder(k, + m, + max_num_triplets=100, + dtype=dtype) @ti.kernel def fill(Abuilder: ti.types.sparse_matrix_builder(),