diff --git a/taichi/codegen/cuda/codegen_cuda.cpp b/taichi/codegen/cuda/codegen_cuda.cpp index e195c2f8373c3..7db3c50915bc2 100644 --- a/taichi/codegen/cuda/codegen_cuda.cpp +++ b/taichi/codegen/cuda/codegen_cuda.cpp @@ -599,6 +599,17 @@ class TaskCodeGenCUDA : public TaskCodeGenLLVM { // Issue an "__ldg" instruction to cache data in the read-only data cache. auto intrin = ty->isFloatingPointTy() ? llvm::Intrinsic::nvvm_ldg_global_f : llvm::Intrinsic::nvvm_ldg_global_i; + // Special treatment for bool types. As nvvm_ldg_global_i does not support + // 1-bit integer, so we convert them to i8. + if (ty->getScalarSizeInBits() == 1) { + auto *new_ty = tlctx->get_data_type(); + auto *new_ptr = + builder->CreatePointerCast(ptr, llvm::PointerType::get(new_ty, 0)); + auto *v = builder->CreateIntrinsic( + intrin, {new_ty, llvm::PointerType::get(new_ty, 0)}, + {new_ptr, tlctx->get_constant(new_ty->getScalarSizeInBits())}); + return builder->CreateIsNotNull(v); + } return builder->CreateIntrinsic( intrin, {ty, llvm::PointerType::get(ty, 0)}, {ptr, tlctx->get_constant(ty->getScalarSizeInBits())}); diff --git a/tests/python/test_struct.py b/tests/python/test_struct.py index a5584fc0c49e5..979e10d25a4e4 100644 --- a/tests/python/test_struct.py +++ b/tests/python/test_struct.py @@ -154,3 +154,35 @@ def k() -> int: return x.testme() assert k() == 42 + + +@test_utils.test(arch=[ti.cpu, ti.cuda, ti.amdgpu]) +def test_struct_field_with_bool(): + @ti.dataclass + class S: + a: ti.i16 + b: bool + c: ti.i16 + + sf = S.field(shape=(10, 1)) + sf[0, 0].b = False + sf[0, 0].a = 0xFFFF + sf[0, 0].c = 0xFFFF + + def foo() -> S: + return sf[0, 0] + + assert foo().a == -1 + assert foo().c == -1 + assert foo().b == False + + sf[1, 0].a = 0x0000 + sf[1, 0].c = 0x0000 + sf[1, 0].b = True + + def bar() -> S: + return sf[1, 0] + + assert bar().a == 0 + assert bar().c == 0 + assert bar().b == True