Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] Warn when number of kernel args exceeds maximum available on GPU #2361

Merged
merged 11 commits into from
Sep 2, 2020
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -10997,6 +10997,11 @@ def err_sycl_restrict : Error<
"|use a const static or global variable that is neither zero-initialized "
"nor constant-initialized"
"}0">;
def warn_sycl_kernel_too_many_args : Warning<
"kernel argument count (%0) exceeds supported maximum of %1 on GPU">,
InGroup<SyclStrict>;
def note_sycl_kernel_args_count : Note<"array elements and fields of a "
"class/struct may be counted separately">;
def err_sycl_virtual_types : Error<
"No class with a vtable can be used in a SYCL kernel or any code included in the kernel">;
def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">;
Expand Down
85 changes: 83 additions & 2 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ enum KernelInvocationKind {

const static std::string InitMethodName = "__init";
const static std::string FinalizeMethodName = "__finalize";
constexpr unsigned GPUMaxKernelArgsNum = 2000;

namespace {

Expand Down Expand Up @@ -1657,6 +1658,83 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
using SyclKernelFieldHandler::leaveStruct;
};

class SyclKernelNumArgsChecker : public SyclKernelFieldHandler {
SourceLocation KernelLoc;
unsigned NumOfParams = 0;

bool handleSpecialType(QualType FieldTy) {
const auto *RecordDecl = FieldTy->getAsCXXRecordDecl();
Fznamznon marked this conversation as resolved.
Show resolved Hide resolved
assert(RecordDecl && "The accessor/sampler must be a RecordDecl");
CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName);
assert(InitMethod && "The accessor/sampler must have the __init method");
NumOfParams += InitMethod->getNumParams();
return true;
}

public:
SyclKernelNumArgsChecker(Sema &S, SourceLocation Loc)
: SyclKernelFieldHandler(S), KernelLoc(Loc) {}

~SyclKernelNumArgsChecker() {
if (SemaRef.Context.getTargetInfo().getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_gen) {
if (NumOfParams > GPUMaxKernelArgsNum) {
SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_many_args)
<< NumOfParams << GPUMaxKernelArgsNum;
SemaRef.Diag(KernelLoc, diag::note_sycl_kernel_args_count);
}
}
}

bool handleSyclAccessorType(FieldDecl *FD, QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handleSyclAccessorType(const CXXRecordDecl *, const CXXBaseSpecifier &,
QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handleSyclSamplerType(FieldDecl *FD, QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handleSyclSamplerType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handlePointerType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}

bool handleScalarType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}

bool handleUnionType(FieldDecl *FD, QualType FieldTy) final {
return handleScalarType(FD, FieldTy);
}

bool handleSyclHalfType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}

bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}
bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &,
QualType FieldTy) final {
NumOfParams++;
return true;
}
using SyclKernelFieldHandler::handleSyclHalfType;
};

class SyclKernelBodyCreator : public SyclKernelFieldHandler {
SyclKernelDeclCreator &DeclCreator;
llvm::SmallVector<Stmt *, 16> BodyStmts;
Expand Down Expand Up @@ -2351,6 +2429,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,

SyclKernelFieldChecker FieldChecker(*this);
SyclKernelUnionChecker UnionChecker(*this);
SyclKernelNumArgsChecker NumArgsChecker(*this, Args[0]->getExprLoc());
// check that calling kernel conforms to spec
QualType KernelParamTy = KernelFunc->getParamDecl(0)->getType();
if (KernelParamTy->isReferenceType()) {
Expand All @@ -2365,8 +2444,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,

KernelObjVisitor Visitor{*this};
DiagnosingSYCLKernel = true;
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker);
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker);
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker,
NumArgsChecker);
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker,
NumArgsChecker);
DiagnosingSYCLKernel = false;
if (!FieldChecker.isValid() || !UnionChecker.isValid())
KernelFunc->setInvalidDecl();
Expand Down
43 changes: 43 additions & 0 deletions clang/test/SemaSYCL/num-args-overflow.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -DGPU -fsycl-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64 -fsycl-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -Wno-sycl-strict -fsycl-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -Werror=sycl-strict -DERROR -fsycl-is-device -fsyntax-only -verify %s

#include <sycl.hpp>

template <typename Name, typename F>
__attribute__((sycl_kernel)) void kernel(F KernelFunc) {
KernelFunc();
}

template <typename Name, typename F>
void parallel_for(F KernelFunc) {
#ifdef GPU
// expected-warning@+8 {{kernel argument count (2001) exceeds supported maximum of 2000 on GPU}}
// expected-note@+7 {{array elements and fields of a class/struct may be counted separately}}
#elif ERROR
// expected-error@+5 {{kernel argument count (2001) exceeds supported maximum of 2000 on GPU}}
// expected-note@+4 {{array elements and fields of a class/struct may be counted separately}}
#else
// expected-no-diagnostics
#endif
kernel<Name>(KernelFunc);
}

using Accessor =
cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>;

void use() {
struct S {
int A;
int B;
Accessor AAcc;
Accessor BAcc;
int Array[1991];
} Args;
auto L = [=]() { (void)Args; };
#if defined(GPU) || defined(ERROR)
// expected-note@+2 {{in instantiation of function template specialization 'parallel_for<Foo}}
#endif
parallel_for<class Foo>(L);
}