Skip to content

Commit

Permalink
[SYCL] Warn when number of kernel args exceeds maximum available on G…
Browse files Browse the repository at this point in the history
…PU (#2361)

Emit a warning when number of resulting kernel arguments exceeds 2k -
maximum available number of kernel arguments on GPU device. Emit a
warning only in GPU AOT mode since other devices don't have such
limitation.
  • Loading branch information
Fznamznon authored Sep 2, 2020
1 parent c7f915e commit ee5299d
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 2 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -10997,6 +10997,11 @@ def err_sycl_restrict : Error<
"|use a const static or global variable that is neither zero-initialized "
"nor constant-initialized"
"}0">;
def warn_sycl_kernel_too_many_args : Warning<
"kernel argument count (%0) exceeds supported maximum of %1 on GPU">,
InGroup<SyclStrict>;
def note_sycl_kernel_args_count : Note<"array elements and fields of a "
"class/struct may be counted separately">;
def err_sycl_virtual_types : Error<
"No class with a vtable can be used in a SYCL kernel or any code included in the kernel">;
def note_sycl_recursive_function_declared_here: Note<"function implemented using recursion declared here">;
Expand Down
85 changes: 83 additions & 2 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ enum KernelInvocationKind {

const static std::string InitMethodName = "__init";
const static std::string FinalizeMethodName = "__finalize";
constexpr unsigned GPUMaxKernelArgsNum = 2000;

namespace {

Expand Down Expand Up @@ -1657,6 +1658,83 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
using SyclKernelFieldHandler::leaveStruct;
};

class SyclKernelNumArgsChecker : public SyclKernelFieldHandler {
SourceLocation KernelLoc;
unsigned NumOfParams = 0;

bool handleSpecialType(QualType FieldTy) {
const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl();
assert(RecordDecl && "The accessor/sampler must be a RecordDecl");
CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName);
assert(InitMethod && "The accessor/sampler must have the __init method");
NumOfParams += InitMethod->getNumParams();
return true;
}

public:
SyclKernelNumArgsChecker(Sema &S, SourceLocation Loc)
: SyclKernelFieldHandler(S), KernelLoc(Loc) {}

~SyclKernelNumArgsChecker() {
if (SemaRef.Context.getTargetInfo().getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_gen) {
if (NumOfParams > GPUMaxKernelArgsNum) {
SemaRef.Diag(KernelLoc, diag::warn_sycl_kernel_too_many_args)
<< NumOfParams << GPUMaxKernelArgsNum;
SemaRef.Diag(KernelLoc, diag::note_sycl_kernel_args_count);
}
}
}

bool handleSyclAccessorType(FieldDecl *FD, QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handleSyclAccessorType(const CXXRecordDecl *, const CXXBaseSpecifier &,
QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handleSyclSamplerType(FieldDecl *FD, QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handleSyclSamplerType(const CXXRecordDecl *, const CXXBaseSpecifier &BS,
QualType FieldTy) final {
return handleSpecialType(FieldTy);
}

bool handlePointerType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}

bool handleScalarType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}

bool handleUnionType(FieldDecl *FD, QualType FieldTy) final {
return handleScalarType(FD, FieldTy);
}

bool handleSyclHalfType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}

bool handleSyclStreamType(FieldDecl *FD, QualType FieldTy) final {
NumOfParams++;
return true;
}
bool handleSyclStreamType(const CXXRecordDecl *, const CXXBaseSpecifier &,
QualType FieldTy) final {
NumOfParams++;
return true;
}
using SyclKernelFieldHandler::handleSyclHalfType;
};

class SyclKernelBodyCreator : public SyclKernelFieldHandler {
SyclKernelDeclCreator &DeclCreator;
llvm::SmallVector<Stmt *, 16> BodyStmts;
Expand Down Expand Up @@ -2351,6 +2429,7 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,

SyclKernelFieldChecker FieldChecker(*this);
SyclKernelUnionChecker UnionChecker(*this);
SyclKernelNumArgsChecker NumArgsChecker(*this, Args[0]->getExprLoc());
// check that calling kernel conforms to spec
QualType KernelParamTy = KernelFunc->getParamDecl(0)->getType();
if (KernelParamTy->isReferenceType()) {
Expand All @@ -2365,8 +2444,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,

KernelObjVisitor Visitor{*this};
DiagnosingSYCLKernel = true;
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker);
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker);
Visitor.VisitRecordBases(KernelObj, FieldChecker, UnionChecker,
NumArgsChecker);
Visitor.VisitRecordFields(KernelObj, FieldChecker, UnionChecker,
NumArgsChecker);
DiagnosingSYCLKernel = false;
if (!FieldChecker.isValid() || !UnionChecker.isValid())
KernelFunc->setInvalidDecl();
Expand Down
43 changes: 43 additions & 0 deletions clang/test/SemaSYCL/num-args-overflow.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -DGPU -fsycl-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64 -fsycl-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -Wno-sycl-strict -fsycl-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64_gen -Werror=sycl-strict -DERROR -fsycl-is-device -fsyntax-only -verify %s

#include <sycl.hpp>

template <typename Name, typename F>
__attribute__((sycl_kernel)) void kernel(F KernelFunc) {
KernelFunc();
}

template <typename Name, typename F>
void parallel_for(F KernelFunc) {
#ifdef GPU
// expected-warning@+8 {{kernel argument count (2001) exceeds supported maximum of 2000 on GPU}}
// expected-note@+7 {{array elements and fields of a class/struct may be counted separately}}
#elif ERROR
// expected-error@+5 {{kernel argument count (2001) exceeds supported maximum of 2000 on GPU}}
// expected-note@+4 {{array elements and fields of a class/struct may be counted separately}}
#else
// expected-no-diagnostics
#endif
kernel<Name>(KernelFunc);
}

using Accessor =
cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::global_buffer>;

void use() {
struct S {
int A;
int B;
Accessor AAcc;
Accessor BAcc;
int Array[1991];
} Args;
auto L = [=]() { (void)Args; };
#if defined(GPU) || defined(ERROR)
// expected-note@+2 {{in instantiation of function template specialization 'parallel_for<Foo}}
#endif
parallel_for<class Foo>(L);
}

0 comments on commit ee5299d

Please sign in to comment.