diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000000..77c36ce43547 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,19 @@ +clang/ @erichkeane, @Fznamznon + +clang/**/Driver @mdtoguchi @AGindinson + +llvm-spirv/ @AlexeySotkin, @AlexeySachkov + +opencl-aot/ @dm-vodopyanov, @AlexeySachkov, @romanovvlad + +libdevice/ @asavonic, @vzakhari + +sycl/doc/extensions/ @mkinsner, @jbrodman + +sycl/doc/ @pvchupin, @kbobrovs + +sycl/ @romanovvlad, @bader + +xpti/ @tovinkere, @andykaylor + +* @bader diff --git a/.github/workflows/gh_pages.yml b/.github/workflows/gh_pages.yml index 913182bce7df..51950fe54ef1 100644 --- a/.github/workflows/gh_pages.yml +++ b/.github/workflows/gh_pages.yml @@ -13,7 +13,9 @@ jobs: ref: sycl path: repo - name: Install deps - run: sudo apt-get install -y doxygen graphviz ssh ninja-build + run: | + sudo apt-get install -y doxygen graphviz ssh ninja-build + sudo pip3 install sphinx recommonmark sphinx_markdown_tables - name: Build Docs run: | mkdir -p $GITHUB_WORKSPACE/build @@ -21,6 +23,7 @@ jobs: python $GITHUB_WORKSPACE/repo/buildbot/configure.py -w $GITHUB_WORKSPACE \ -s $GITHUB_WORKSPACE/repo -o $GITHUB_WORKSPACE/build -t Release --docs cmake --build . --target doxygen-sycl + cmake --build . --target docs-sycl-html - name: Deploy env: SSH_KEY: ${{secrets.ACTIONS_DEPLOY_KEY}} @@ -32,7 +35,11 @@ jobs: ssh-add -k ~/.ssh/id_rsa git clone git@github.com:intel/llvm-docs.git docs cd $GITHUB_WORKSPACE/docs - yes | \cp -rf $GITHUB_WORKSPACE/build/tools/sycl/doc/doxygen/html/* . + git rm -rf . + touch .nojekyll + yes | \cp -rf $GITHUB_WORKSPACE/build/tools/sycl/doc/html/* . + mkdir doxygen + yes | \cp -rf $GITHUB_WORKSPACE/build/tools/sycl/doc/doxygen/html/* doxygen/ git config --global user.name "iclsrc" git config --global user.email "ia.compiler.tools.git@intel.com" git add . diff --git a/buildbot/configure.py b/buildbot/configure.py index fdc570895273..07f8feb9d726 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -20,6 +20,7 @@ def do_configure(args): sycl_build_pi_cuda = 'OFF' llvm_enable_assertions = 'ON' llvm_enable_doxygen = 'OFF' + llvm_enable_sphinx = 'OFF' llvm_build_shared_libs = 'OFF' if platform.system() == 'Linux': @@ -38,6 +39,7 @@ def do_configure(args): if args.docs: llvm_enable_doxygen = 'ON' + llvm_enable_sphinx = 'ON' if args.shared_libs: llvm_build_shared_libs = 'ON' @@ -63,6 +65,7 @@ def do_configure(args): "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. "-DLLVM_ENABLE_DOXYGEN={}".format(llvm_enable_doxygen), + "-DLLVM_ENABLE_SPHINX={}".format(llvm_enable_sphinx), "-DBUILD_SHARED_LIBS={}".format(llvm_build_shared_libs), "-DSYCL_ENABLE_XPTI_TRACING=ON", # Explicitly turn on XPTI tracing llvm_dir diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index fcaa18909fbe..9ddb633a2899 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -237,14 +237,7 @@ set(ENABLE_X86_RELAX_RELOCATIONS OFF CACHE BOOL set(ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER FALSE CACHE BOOL "Enable the experimental new pass manager by default.") -# Clang tool executes cc1 commands in the same process after b4a99a0 -# It causes increasing memory consumption for compilations where several -# source files are passed (for C++ application) or several build steps (e.g. -# for SYCL application we have host, device and integration header step -# per source file). Memory is not freed for all cc1 commands until end -# This change forces clang driver use old behavior untill memory issue -# is fixed. -set(CLANG_SPAWN_CC1 ON CACHE BOOL +set(CLANG_SPAWN_CC1 OFF CACHE BOOL "Whether clang should use a new process for the CC1 invocation") # TODO: verify the values against LangStandards.def? diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 2a63bc878e98..e04ed4252c4f 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -236,6 +236,10 @@ class DefaultIntArgument : IntArgument { int Default = default; } +class DefaultUnsignedArgument : UnsignedArgument { + int Default = default; +} + // This argument is more complex, it includes the enumerator type name, // a list of strings to accept, and a list of enumerators to map them to. class EnumArgument values, @@ -1676,6 +1680,57 @@ def SYCLIntelFPGAMaxConcurrency : Attr { let Documentation = [SYCLIntelFPGAMaxConcurrencyAttrDocs]; } +def SYCLIntelFPGALoopCoalesce : Attr { + let Spellings = [CXX11<"intelfpga","loop_coalesce">]; + let Args = [ExprArgument<"NExpr">]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let HasCustomTypeTransform = 1; + let AdditionalMembers = [{ + static const char *getName() { + return "loop_coalesce"; + } + }]; + let Documentation = [SYCLIntelFPGALoopCoalesceAttrDocs]; +} + +def SYCLIntelFPGADisableLoopPipelining : Attr { + let Spellings = [CXX11<"intelfpga","disable_loop_pipelining">]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let HasCustomTypeTransform = 1; + let AdditionalMembers = [{ + static const char *getName() { + return "disable_loop_pipelining"; + } + }]; + let Documentation = [SYCLIntelFPGADisableLoopPipeliningAttrDocs]; +} + +def SYCLIntelFPGAMaxInterleaving : Attr { + let Spellings = [CXX11<"intelfpga","max_interleaving">]; + let Args = [ExprArgument<"NExpr">]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let HasCustomTypeTransform = 1; + let AdditionalMembers = [{ + static const char *getName() { + return "max_interleaving"; + } + }]; + let Documentation = [SYCLIntelFPGAMaxInterleavingAttrDocs]; +} + +def SYCLIntelFPGASpeculatedIterations : Attr { + let Spellings = [CXX11<"intelfpga","speculated_iterations">]; + let Args = [ExprArgument<"NExpr">]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let HasCustomTypeTransform = 1; + let AdditionalMembers = [{ + static const char *getName() { + return "speculated_iterations"; + } + }]; + let Documentation = [SYCLIntelFPGASpeculatedIterationsAttrDocs]; +} + def IntelFPGALocalNonConstVar : SubsetSubjecthasLocalStorage() && S->getKind() != Decl::ImplicitParam && @@ -2471,13 +2526,16 @@ def NoDeref : TypeAttr { let Documentation = [NoDerefDocs]; } +// Default arguments in ReqWorkGroupSize can be used only with +// intel::reqd_work_group_size spelling. def ReqdWorkGroupSize : InheritableAttr { let Spellings = [GNU<"reqd_work_group_size">, - CXX11<"cl","reqd_work_group_size">]; - let Args = [UnsignedArgument<"XDim">, UnsignedArgument<"YDim">, - UnsignedArgument<"ZDim">]; + CXX11<"intel","reqd_work_group_size">, + CXX11<"cl","reqd_work_group_size">]; + let Args = [UnsignedArgument<"XDim">, DefaultUnsignedArgument<"YDim", 1>, + DefaultUnsignedArgument<"ZDim", 1>]; let Subjects = SubjectList<[Function], ErrorDiag>; - let Documentation = [Undocumented]; + let Documentation = [ReqdWorkGroupSizeAttrDocs]; } def WorkGroupSizeHint : InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index d521d6c794c4..4f70e3b75eab 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2003,6 +2003,36 @@ device kernel, the attribute is ignored and it is not propagated to a kernel. }]; } +def ReqdWorkGroupSizeAttrDocs : Documentation { + let Category = DocCatFunction; + let Heading = "reqd_work_group_size"; + let Content = [{ +This attribute is documented by both OpenCL and SYCL standards +and allows to specify exact *local_work_size* which must be used as +argument to **clEnqueueNDRangeKernel** (in OpenCL) or to +**parallel_for** in SYCL. This allows the compiler to optimize the +generated code appropriately for the kernel to which attribute is applied. + +While semantic of this attribute is the same between OpenCL and SYCL, +spelling is a bit different: + +SYCL 1.2.1 describes ``[[cl::reqd_work_group_size(X, Y, Z)]]`` spelling: this +attribute is legal on device functions and is propagated down to any caller of +those device functions, such that the kernel attributes are the sum of all +attributes of all device functions called in this kernel. +See section 6.7 Attributes for more details. + +As Intel extension, ``[[intel::reqd_work_group_size(X, Y, Z)]]`` spelling is allowed +which features optional arguments `Y` and `Z`, those simplifies its usage if +only 1- or 2-dimensional ND-range is assumed by a programmer. These arguments +defaults to ``1``. + +In OpenCL C, this attribute is available in GNU spelling +(``__attribute__((reqd_work_group_size(X, Y, Z)))``), see section +6.7.2 Optional Attribute Qualifiers of OpenCL 1.2 specification for details. + }]; +} + def SYCLIntelMaxWorkGroupSizeAttrDocs : Documentation { let Category = DocCatFunction; let Heading = "max_work_group_size (IntelFPGA)"; @@ -2110,6 +2140,53 @@ be applied multiple times to the same loop. }]; } +def SYCLIntelFPGALoopCoalesceAttrDocs : Documentation { + let Category = DocCatVariable; + let Heading = "loop_coalesce"; + let Content = [{ +This attribute applies to a loop. Indicates that the loop nest should be +coalesced into a single loop without affecting functionality. Parameter N is +optional. If specified, it shall be a positive integer, and indicates how many +of the nested loop levels should be coalesced. + }]; +} + +def SYCLIntelFPGADisableLoopPipeliningAttrDocs : Documentation { + let Category = DocCatVariable; + let Heading = "disable_loop_pipelining"; + let Content = [{ +This attribute applies to a loop. Disables pipelining of the loop data path, +causing the loop to be executed serially. Cannot be used on the same loop in +conjunction with max_interleaving, speculated_iterations, max_concurrency, ii +or ivdep. + }]; +} + +def SYCLIntelFPGAMaxInterleavingAttrDocs : Documentation { + let Category = DocCatVariable; + let Heading = "max_interleaving"; + let Content = [{ +This attribute applies to a loop. Places a maximum limit N on the number of +interleaved invocations of an inner loop by an outer loop (note, this does not +mean that this attribute can only be applied to inner loops in user code - outer +loops in user code may still be contained in an implicit loop due to NDRange). +Parameter N is mandatory, and shall be non-negative integer. Cannot be +used on the same loop in conjunction with disable_loop_pipelining. + }]; +} + +def SYCLIntelFPGASpeculatedIterationsAttrDocs : Documentation { + let Category = DocCatVariable; + let Heading = "speculated_iterations"; + let Content = [{ +This attribute applies to a loop. Specifies the number of concurrent speculated +iterations that will be in flight for a loop invocation (i.e. the exit +condition for these iterations will not have been evaluated yet). +Parameter N is mandatory, and may either be 0, or a positive integer. Cannot be +used on the same loop in conjunction with disable_loop_pipelining. + }]; +} + def SYCLDeviceIndirectlyCallableDocs : Documentation { let Category = DocCatFunction; let Heading = "intel::device_indirectly_callable"; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d4a4d1692edc..d2ac6d6fd888 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10666,9 +10666,9 @@ def err_builtin_launder_invalid_arg : Error< // SYCL-specific diagnostics def err_sycl_attribute_address_space_invalid : Error< "address space is outside the valid range of values">; -def err_sycl_kernel_name_class_not_top_level : Error< - "kernel name class and its template argument classes' declarations can only " - "nest in a namespace: %0">; +def err_sycl_kernel_incorrectly_named : Error< + "kernel %select{name is missing" + "|needs to have a globally-visible name}0">; def err_sycl_restrict : Error< "SYCL kernel cannot " "%select{use a non-const global variable" diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 88e20a965c69..f2f13e2bc333 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include #include namespace llvm { @@ -78,9 +79,10 @@ class Action { SYCLPostLinkJobClass, PartialLinkJobClass, BackendCompileJobClass, + FileTableTformJobClass, JobClassFirst = PreprocessJobClass, - JobClassLast = BackendCompileJobClass + JobClassLast = FileTableTformJobClass }; // The offloading kind determines if this action is binded to a particular @@ -679,6 +681,13 @@ class SYCLPostLinkJobAction : public JobAction { static bool classof(const Action *A) { return A->getKind() == SYCLPostLinkJobClass; } + + void setRTSetsSpecConstants(bool Val) { RTSetsSpecConsts = Val; } + + bool getRTSetsSpecConstants() const { return RTSetsSpecConsts; } + +private: + bool RTSetsSpecConsts = true; }; class PartialLinkJobAction : public JobAction { @@ -705,6 +714,47 @@ class BackendCompileJobAction : public JobAction { } }; +// Represents a file table transformation action. The order of inputs to a +// FileTableTformJobAction at construction time must accord with the tforms +// added later - some tforms "consume" inputs. For example, "replace column" +// needs another file to read the replacement column from. +class FileTableTformJobAction : public JobAction { + void anchor() override; + +public: + struct Tform { + enum Kind { EXTRACT, EXTRACT_DROP_TITLE, REPLACE }; + + Tform() = default; + Tform(Kind K, std::initializer_list Args) : TheKind(K) { + for (auto A : Args) + TheArgs.emplace_back(A.str()); + } + + Kind TheKind; + SmallVector TheArgs; + }; + + FileTableTformJobAction(Action *Input, types::ID OutputType); + FileTableTformJobAction(ActionList &Inputs, types::ID OutputType); + + // Deletes all columns except the one with given name. + void addExtractColumnTform(StringRef ColumnName, bool WithColTitle = true); + + // Replaces a column with title in this table with a column with title + // from another file table passed as input to this action. + void addReplaceColumnTform(StringRef From, StringRef To); + + static bool classof(const Action *A) { + return A->getKind() == FileTableTformJobClass; + } + + const ArrayRef getTforms() const { return Tforms; } + +private: + SmallVector Tforms; // transformation actions requested +}; + } // namespace driver } // namespace clang diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td index adcf35293c86..868034ff6c51 100644 --- a/clang/include/clang/Driver/CC1Options.td +++ b/clang/include/clang/Driver/CC1Options.td @@ -919,6 +919,8 @@ def fsycl_std_layout_kernel_params: Flag<["-"], "fsycl-std-layout-kernel-params" def fsycl_allow_func_ptr : Flag<["-"], "fsycl-allow-func-ptr">, HelpText<"Allow function pointers in SYCL device.">; def fno_sycl_allow_func_ptr : Flag<["-"], "fno-sycl-allow-func-ptr">; +def fsycl_enable_optimizations: Flag<["-"], "fsycl-enable-optimizations">, + HelpText<"Experimental flag enabling standard optimization in the front-end.">; } // let Flags = [CC1Option] diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index cc4b67407ff3..18b73e9c61d6 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -147,6 +147,7 @@ class ToolChain { mutable std::unique_ptr SYCLPostLink; mutable std::unique_ptr PartialLink; mutable std::unique_ptr BackendCompiler; + mutable std::unique_ptr FileTableTform; Tool *getClang() const; Tool *getFlang() const; @@ -161,6 +162,7 @@ class ToolChain { Tool *getSYCLPostLink() const; Tool *getPartialLink() const; Tool *getBackendCompiler() const; + Tool *getTableTform() const; mutable std::unique_ptr SanitizerArguments; mutable std::unique_ptr XRayArguments; diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def index e334d143c359..7b003aab3eb3 100644 --- a/clang/include/clang/Driver/Types.def +++ b/clang/include/clang/Driver/Types.def @@ -103,7 +103,7 @@ TYPE("spirv", SPIRV, INVALID, "spv", phases TYPE("sycl-header", SYCL_Header, INVALID, "h", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("sycl-fatbin", SYCL_FATBIN, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("tempfilelist", Tempfilelist, INVALID, "txt", phases::Compile, phases::Backend, phases::Assemble, phases::Link) -TYPE("tempentriesfilelist", TempEntriesfilelist, INVALID, "txt", phases::Compile, phases::Backend, phases::Assemble, phases::Link) +TYPE("tempfiletable", Tempfiletable,INVALID, "table", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("tempAOCOfilelist", TempAOCOfilelist, INVALID, "txt", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("archive", Archive, INVALID, "a", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("wholearchive", WholeArchive, INVALID, "a", phases::Compile, phases::Backend, phases::Assemble, phases::Link) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 58538bb12781..b7c58e0e73e8 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -329,7 +329,7 @@ class SYCLIntegrationHeader { /// Signals that subsequent parameter descriptor additions will go to /// the kernel with given name. Starts new kernel invocation descriptor. void startKernel(StringRef KernelName, QualType KernelNameType, - StringRef KernelStableName); + StringRef KernelStableName, SourceLocation Loc); /// Adds a kernel parameter descriptor to current kernel invocation /// descriptor. @@ -339,6 +339,9 @@ class SYCLIntegrationHeader { /// invocation descriptor has finished. void endKernel(); + /// Registers a specialization constant to emit info for it into the header. + void addSpecConstant(StringRef IDName, QualType IDType); + private: // Kernel actual parameter descriptor. struct KernelParamDesc { @@ -367,6 +370,8 @@ class SYCLIntegrationHeader { /// Kernel name with stable lambda name mangling std::string StableName; + SourceLocation KernelLocation; + /// Descriptor of kernel actual parameters. SmallVector Params; @@ -381,7 +386,8 @@ class SYCLIntegrationHeader { } /// Emits a forward declaration for given declaration. - void emitFwdDecl(raw_ostream &O, const Decl *D); + void emitFwdDecl(raw_ostream &O, const Decl *D, + SourceLocation KernelLocation); /// Emits forward declarations of classes and template classes on which /// declaration of given type depends. See example in the comments for the @@ -390,16 +396,27 @@ class SYCLIntegrationHeader { /// stream to emit to /// \param T /// type to emit forward declarations for + /// \param KernelLocation + /// source location of the SYCL kernel function, used to emit nicer + /// diagnostic messages if kernel name is missing /// \param Emitted /// a set of declarations forward declrations has been emitted for already void emitForwardClassDecls(raw_ostream &O, QualType T, - llvm::SmallPtrSetImpl &Emitted); + SourceLocation KernelLocation, + llvm::SmallPtrSetImpl &Emitted); private: /// Keeps invocation descriptors for each kernel invocation started by /// SYCLIntegrationHeader::startKernel SmallVector KernelDescs; + using SpecConstID = std::pair; + + /// Keeps specialization constants met in the translation unit. Maps spec + /// constant's ID type to generated unique name. Duplicates are removed at + /// integration header emission time. + llvm::SmallVector SpecConsts; + /// Used for emitting diagnostics. DiagnosticsEngine &Diag; @@ -1759,7 +1776,7 @@ class Sema final { Expr *Expr2); template FPGALoopAttrT *BuildSYCLIntelFPGALoopAttr(const AttributeCommonInfo &A, - Expr *E); + Expr *E = nullptr); LoopUnrollHintAttr *BuildLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 03e86abdac71..1e8e814f2d90 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1817,6 +1817,7 @@ std::string Qualifiers::getAddrSpaceAsString(LangAS AS) { case LangAS::sycl_constant: return "__constant"; case LangAS::opencl_generic: + case LangAS::sycl_generic: return "__generic"; case LangAS::cuda_device: return "__device__"; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 582778f9790f..4b5b2a545406 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -17,6 +17,7 @@ #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" +#include "CGSYCLRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index 5189bf3f47ba..089255b2997b 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -476,6 +476,24 @@ EmitIVDepLoopMetadata(LLVMContext &Ctx, LoopProperties.push_back(MDNode::get(Ctx, MD)); } +/// Setting the legacy LLVM IR representation of the ivdep attribute. +static void EmitLegacyIVDepLoopMetadata( + LLVMContext &Ctx, llvm::SmallVectorImpl &LoopProperties, + const LoopAttributes::SYCLIVDepInfo &I) { + // Only emit the "enable" metadata if the safelen is set to 0, implying + // infinite safe length. + if (I.SafeLen == 0) { + Metadata *EnableMDs[] = {MDString::get(Ctx, "llvm.loop.ivdep.enable")}; + LoopProperties.push_back(MDNode::get(Ctx, EnableMDs)); + return; + } + + Metadata *SafelenMDs[] = {MDString::get(Ctx, "llvm.loop.ivdep.safelen"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), I.SafeLen))}; + LoopProperties.push_back(MDNode::get(Ctx, SafelenMDs)); +} + MDNode *LoopInfo::createMetadata( const LoopAttributes &Attrs, llvm::ArrayRef AdditionalLoopProperties, @@ -500,14 +518,20 @@ MDNode *LoopInfo::createMetadata( } LLVMContext &Ctx = Header->getContext(); - if (Attrs.GlobalSYCLIVDepInfo.hasValue()) + if (Attrs.GlobalSYCLIVDepInfo.hasValue()) { EmitIVDepLoopMetadata(Ctx, LoopProperties, *Attrs.GlobalSYCLIVDepInfo); + // The legacy metadata also needs to be emitted to provide backwards + // compatibility with any conformant backend. This is done exclusively + // for the "global" ivdep specification so as not to impose unnecessarily + // tight safe length constraints on the array-specific cases. + EmitLegacyIVDepLoopMetadata(Ctx, LoopProperties, + *Attrs.GlobalSYCLIVDepInfo); + } for (const auto &I : Attrs.ArraySYCLIVDepInfo) EmitIVDepLoopMetadata(Ctx, LoopProperties, I); // Setting ii attribute with an initiation interval if (Attrs.SYCLIInterval > 0) { - LLVMContext &Ctx = Header->getContext(); Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.ii.count"), ConstantAsMetadata::get(ConstantInt::get( llvm::Type::getInt32Ty(Ctx), Attrs.SYCLIInterval))}; @@ -516,7 +540,6 @@ MDNode *LoopInfo::createMetadata( // Setting max_concurrency attribute with number of threads if (Attrs.SYCLMaxConcurrencyEnable) { - LLVMContext &Ctx = Header->getContext(); Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.max_concurrency.count"), ConstantAsMetadata::get(ConstantInt::get( llvm::Type::getInt32Ty(Ctx), @@ -524,6 +547,45 @@ MDNode *LoopInfo::createMetadata( LoopProperties.push_back(MDNode::get(Ctx, Vals)); } + if (Attrs.SYCLLoopCoalesceEnable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.coalesce.enable")}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + + if (Attrs.SYCLLoopCoalesceNLevels > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.coalesce.count"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), Attrs.SYCLLoopCoalesceNLevels))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + + // disable_loop_pipelining attribute corresponds to + // 'llvm.loop.intel.pipelining.enable, i32 0' metadata + if (Attrs.SYCLLoopPipeliningDisable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.intel.pipelining.enable"), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 0))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + + if (Attrs.SYCLMaxInterleavingEnable) { + Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.max_interleaving.count"), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), + Attrs.SYCLMaxInterleavingNInvocations))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + + if (Attrs.SYCLSpeculatedIterationsEnable) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.intel.speculated.iterations.count"), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.SYCLSpeculatedIterationsNIterations))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), AdditionalLoopProperties.end()); return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); @@ -535,9 +597,13 @@ LoopAttributes::LoopAttributes(bool IsParallel) UnrollAndJamEnable(LoopAttributes::Unspecified), VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), InterleaveCount(0), SYCLIInterval(0), SYCLMaxConcurrencyEnable(false), - SYCLMaxConcurrencyNThreads(0), UnrollCount(0), UnrollAndJamCount(0), - DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), - PipelineInitiationInterval(0) {} + SYCLMaxConcurrencyNThreads(0), SYCLLoopCoalesceEnable(false), + SYCLLoopCoalesceNLevels(0), SYCLLoopPipeliningDisable(false), + SYCLMaxInterleavingEnable(false), SYCLMaxInterleavingNInvocations(0), + SYCLSpeculatedIterationsEnable(false), + SYCLSpeculatedIterationsNIterations(0), UnrollCount(0), + UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), + PipelineDisabled(false), PipelineInitiationInterval(0) {} void LoopAttributes::clear() { IsParallel = false; @@ -547,6 +613,13 @@ void LoopAttributes::clear() { SYCLIInterval = 0; SYCLMaxConcurrencyEnable = false; SYCLMaxConcurrencyNThreads = 0; + SYCLLoopCoalesceEnable = false; + SYCLLoopCoalesceNLevels = 0; + SYCLLoopPipeliningDisable = false; + SYCLMaxInterleavingEnable = false; + SYCLMaxInterleavingNInvocations = 0; + SYCLSpeculatedIterationsEnable = false; + SYCLSpeculatedIterationsNIterations = 0; InterleaveCount = 0; UnrollCount = 0; UnrollAndJamCount = 0; @@ -574,9 +647,16 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.InterleaveCount == 0 && !Attrs.GlobalSYCLIVDepInfo.hasValue() && Attrs.ArraySYCLIVDepInfo.empty() && Attrs.SYCLIInterval == 0 && - Attrs.SYCLMaxConcurrencyEnable == false && Attrs.UnrollCount == 0 && - Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && - Attrs.PipelineInitiationInterval == 0 && + Attrs.SYCLMaxConcurrencyEnable == false && + Attrs.SYCLLoopCoalesceEnable == false && + Attrs.SYCLLoopCoalesceNLevels == 0 && + Attrs.SYCLLoopPipeliningDisable == false && + Attrs.SYCLMaxInterleavingEnable == false && + Attrs.SYCLMaxInterleavingNInvocations == 0 && + Attrs.SYCLSpeculatedIterationsEnable == false && + Attrs.SYCLSpeculatedIterationsNIterations == 0 && + Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && + !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && @@ -878,6 +958,16 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, // n - 'llvm.loop.ii.count, i32 n' metadata will be emitted // For attribute max_concurrency: // n - 'llvm.loop.max_concurrency.count, i32 n' metadata will be emitted + // For attribute loop_coalesce: + // without parameter - 'lvm.loop.coalesce.enable' metadata will be emitted + // n - 'llvm.loop.coalesce.count, i32 n' metadata will be emitted + // For attribute disable_loop_pipelining: + // 'llvm.loop.intel.pipelining.enable, i32 0' metadata will be emitted + // For attribute max_interleaving: + // n - 'llvm.loop.max_interleaving.count, i32 n' metadata will be emitted + // For attribute speculated_iterations: + // n - 'llvm.loop.intel.speculated.iterations.count, i32 n' metadata will be + // emitted for (const auto *Attr : Attrs) { const SYCLIntelFPGAIVDepAttr *IntelFPGAIVDep = dyn_cast(Attr); @@ -885,8 +975,19 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, dyn_cast(Attr); const SYCLIntelFPGAMaxConcurrencyAttr *IntelFPGAMaxConcurrency = dyn_cast(Attr); - - if (!IntelFPGAIVDep && !IntelFPGAII && !IntelFPGAMaxConcurrency) + const SYCLIntelFPGALoopCoalesceAttr *IntelFPGALoopCoalesce = + dyn_cast(Attr); + const SYCLIntelFPGADisableLoopPipeliningAttr + *IntelFPGADisableLoopPipelining = + dyn_cast(Attr); + const SYCLIntelFPGAMaxInterleavingAttr *IntelFPGAMaxInterleaving = + dyn_cast(Attr); + const SYCLIntelFPGASpeculatedIterationsAttr *IntelFPGASpeculatedIterations = + dyn_cast(Attr); + + if (!IntelFPGAIVDep && !IntelFPGAII && !IntelFPGAMaxConcurrency && + !IntelFPGALoopCoalesce && !IntelFPGADisableLoopPipelining && + !IntelFPGAMaxInterleaving && !IntelFPGASpeculatedIterations) continue; if (IntelFPGAIVDep) { @@ -919,6 +1020,44 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, setSYCLMaxConcurrencyEnable(); setSYCLMaxConcurrencyNThreads(ArgVal.getSExtValue()); } + + if (IntelFPGALoopCoalesce) { + llvm::APSInt ArgVal(32); + if (auto *LCE = IntelFPGALoopCoalesce->getNExpr()) { + bool IsValid = LCE->isIntegerConstantExpr(ArgVal, Ctx); + assert(IsValid && "Not an integer constant expression"); + (void)IsValid; + setSYCLLoopCoalesceNLevels(ArgVal.getSExtValue()); + } else { + setSYCLLoopCoalesceEnable(); + } + } + + if (IntelFPGADisableLoopPipelining) { + setSYCLLoopPipeliningDisable(); + } + + if (IntelFPGAMaxInterleaving) { + llvm::APSInt ArgVal(32); + bool IsValid = + IntelFPGAMaxInterleaving->getNExpr()->isIntegerConstantExpr(ArgVal, + Ctx); + assert(IsValid && "Not an integer constant expression"); + (void)IsValid; + setSYCLMaxInterleavingEnable(); + setSYCLMaxInterleavingNInvocations(ArgVal.getSExtValue()); + } + + if (IntelFPGASpeculatedIterations) { + llvm::APSInt ArgVal(32); + bool IsValid = + IntelFPGASpeculatedIterations->getNExpr()->isIntegerConstantExpr( + ArgVal, Ctx); + assert(IsValid && "Not an integer constant expression"); + (void)IsValid; + setSYCLSpeculatedIterationsEnable(); + setSYCLSpeculatedIterationsNIterations(ArgVal.getSExtValue()); + } } /// Stage the attributes. diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 690c4e68df1c..2d128b62cc6d 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -113,6 +113,27 @@ struct LoopAttributes { /// Value for llvm.loop.max_concurrency.count metadata. unsigned SYCLMaxConcurrencyNThreads; + /// Flag for llvm.loop.coalesce metadata. + bool SYCLLoopCoalesceEnable; + + /// Value for llvm.loop.coalesce.count metadata. + unsigned SYCLLoopCoalesceNLevels; + + /// Flag for llvm.loop.intel.pipelining.enable, i32 0 metadata. + bool SYCLLoopPipeliningDisable; + + /// Flag for llvm.loop.max_interleaving.count metadata. + bool SYCLMaxInterleavingEnable; + + /// Value for llvm.loop.max_interleaving.count metadata. + unsigned SYCLMaxInterleavingNInvocations; + + /// Flag for llvm.loop.intel.speculated.iterations.count metadata. + bool SYCLSpeculatedIterationsEnable; + + /// Value for llvm.loop.intel.speculated.iterations.count metadata. + unsigned SYCLSpeculatedIterationsNIterations; + /// llvm.unroll. unsigned UnrollCount; @@ -333,6 +354,41 @@ class LoopInfoStack { StagedAttrs.SYCLMaxConcurrencyNThreads = C; } + /// Set flag of loop_coalesce for the next loop pushed. + void setSYCLLoopCoalesceEnable() { + StagedAttrs.SYCLLoopCoalesceEnable = true; + } + + /// Set value of coalesced levels for the next loop pushed. + void setSYCLLoopCoalesceNLevels(unsigned C) { + StagedAttrs.SYCLLoopCoalesceNLevels = C; + } + + /// Set flag of disable_loop_pipelining for the next loop pushed. + void setSYCLLoopPipeliningDisable() { + StagedAttrs.SYCLLoopPipeliningDisable = true; + } + + /// Set flag of max_interleaving for the next loop pushed. + void setSYCLMaxInterleavingEnable() { + StagedAttrs.SYCLMaxInterleavingEnable = true; + } + + /// Set value of max interleaved invocations for the next loop pushed. + void setSYCLMaxInterleavingNInvocations(unsigned C) { + StagedAttrs.SYCLMaxInterleavingNInvocations = C; + } + + /// Set flag of speculated_iterations for the next loop pushed. + void setSYCLSpeculatedIterationsEnable() { + StagedAttrs.SYCLSpeculatedIterationsEnable = true; + } + + /// Set value of concurrent speculated iterations for the next loop pushed. + void setSYCLSpeculatedIterationsNIterations(unsigned C) { + StagedAttrs.SYCLSpeculatedIterationsNIterations = C; + } + /// Set the unroll count for the next loop pushed. void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; } diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index f2973da2322e..f7c0c55b5d31 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -53,6 +53,8 @@ const char *Action::getClassName(ActionClass AC) { return "partial-link"; case BackendCompileJobClass: return "backend-compiler"; + case FileTableTformJobClass: + return "file-table-tform"; } llvm_unreachable("invalid class"); @@ -473,3 +475,23 @@ BackendCompileJobAction::BackendCompileJobAction(ActionList &Inputs, BackendCompileJobAction::BackendCompileJobAction(Action *Input, types::ID Type) : JobAction(BackendCompileJobClass, Input, Type) {} + +void FileTableTformJobAction::anchor() {} + +FileTableTformJobAction::FileTableTformJobAction(Action *Input, types::ID Type) + : JobAction(FileTableTformJobClass, Input, Type) {} + +FileTableTformJobAction::FileTableTformJobAction(ActionList &Inputs, + types::ID Type) + : JobAction(FileTableTformJobClass, Inputs, Type) {} + +void FileTableTformJobAction::addExtractColumnTform(StringRef ColumnName, + bool WithColTitle) { + auto K = WithColTitle ? Tform::EXTRACT : Tform::EXTRACT_DROP_TITLE; + Tforms.emplace_back(Tform(K, {ColumnName})); +} + +void FileTableTformJobAction::addReplaceColumnTform(StringRef From, + StringRef To) { + Tforms.emplace_back(Tform(Tform::REPLACE, {From, To})); +} diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index a1408c9861f8..28eacdb05544 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -23,6 +23,7 @@ #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/Option.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/SimpleTable.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -136,12 +137,29 @@ bool Compilation::CleanupFileList(const TempFileList &Files, // Temporary file lists contain files that need to be cleaned. The // file containing the information is also removed if (File.second == types::TY_Tempfilelist || - File.second == types::TY_TempEntriesfilelist) { - std::ifstream ListFile(File.first); - if (ListFile) { - // These are temporary files and need to be removed. + File.second == types::TY_Tempfiletable) { + // These are temporary files and need to be removed. + bool IsTable = File.second == types::TY_Tempfiletable; + + if (IsTable) { + if (llvm::sys::fs::exists(File.first)) { + auto T = llvm::util::SimpleTable::read(File.first); + if (!T) { + Success = false; + continue; + } + std::vector TmpFileNames; + T->get()->linearize(TmpFileNames); + + for (const auto &TmpFileName : TmpFileNames) { + if (!TmpFileName.empty()) + Success &= CleanupFile(TmpFileName.c_str(), IssueErrors); + } + } + } else { + std::ifstream ListFile(File.first); std::string TmpFileName; - while(std::getline(ListFile, TmpFileName) && !TmpFileName.empty()) + while (std::getline(ListFile, TmpFileName) && !TmpFileName.empty()) Success &= CleanupFile(TmpFileName.c_str(), IssueErrors); } } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c08e54d5a54e..934703a8d8b7 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3714,7 +3714,15 @@ class OffloadingActionBuilder final { auto *DeviceCheckAction = C.MakeAction(I, types::TY_Object); DeviceObjects.push_back(DeviceCheckAction); - } else { + continue; + } + // We want to move the AOCX/AOCR binary to the front of the objects + // allowing it to be picked up instead of the other device objects + // at runtime. + // TODO: In the presense of existing FPGA Device binaries (AOCX) + // we do not need to perform/add the SPIR-V generated device + // binaries from sources or objects. + if (types::isFPGA(I->getType())) { // Do not perform a device link and only pass the aocr // file to the offline compilation before wrapping. Just // wrap an aocx file. @@ -3729,7 +3737,9 @@ class OffloadingActionBuilder final { C.MakeAction(I, types::TY_Object); DA.add(*DeviceWrappingAction, **TC, /*BoundArch=*/nullptr, Action::OFK_SYCL); + continue; } + DeviceObjects.push_back(I); } if (!DeviceObjects.empty()) { // When aocx or aocr is found, there is an expectation that none of @@ -3756,74 +3766,161 @@ class OffloadingActionBuilder final { else LinkObjects.push_back(Input); } + // The linkage actions subgraph leading to the offload wrapper. + // [cond] Means incoming/outgoing dependence is created only when cond + // is true. A function of: + // n - target is NVPTX + // a - SPIRV AOT compilation is requested + // s - device code split requested + // * - "all other cases" + // - no condition means output/input is "always" present + // First symbol indicates output/input type + // . - single file output (TY_SPIRV, TY_LLVM_BC,...) + // - - TY_Tempfilelist + // + - TY_Tempfiletable + // + // .-----------------. + // |Link(LinkObjects)| + // .-----------------. + // | + // .--------------------------------------. + // | PostLink | + // .--------------------------------------. + // [.n] [.!na!s] [+*] [+*] + // | | | | + // | | .----------------. | + // | | |FileTableTform | | + // | | |(extract "Code")| | + // | | .----------------. | + // | | [-] | + // | | | | + // | [.a!s] [-*] | + // .-------------. .---------------------. | + // |finalizeNVPTX| | SPIRVTranslator | | + // .-------------. .---------------------. | + // | [.a!s] [-as] [-!a] | + // | | | | | + // | [.!s] [-s] | | + // | .----------------. | | + // | | BackendCompile | | | + // | .----------------. | | + // | [.!s] [-s] | | + // | | | | | + // | | [-a] [-!a] [+] + // | | .----------------. + // | | |FileTableTform | + // | | |(replace "Code")| + // | | .----------------. + // | | | + // [.n] [.!na!s] [+*] + // .--------------------------------------. + // | OffloadWrapper | + // .--------------------------------------. + // Action *DeviceLinkAction = C.MakeAction(LinkObjects, types::TY_LLVM_BC); - ActionList WrapperInputs; - types::ID OutType = types::TY_SPIRV; - if (DeviceCodeSplit) { - auto *SplitAction = C.MakeAction( - DeviceLinkAction, types::TY_Tempfilelist); - auto *EntryGenAction = C.MakeAction( - DeviceLinkAction, types::TY_TempEntriesfilelist); - DeviceLinkAction = SplitAction; - WrapperInputs.push_back(EntryGenAction); - OutType = types::TY_Tempfilelist; - } + // setup some flags upfront auto isNVPTX = (*TC)->getTriple().isNVPTX(); - if (isNVPTX) { - DeviceLinkAction = - finalizeNVPTXDependences(DeviceLinkAction, (*TC)->getTriple()); - } - else - DeviceLinkAction = - C.MakeAction(DeviceLinkAction, OutType); + if (isNVPTX && DeviceCodeSplit) { + // TODO Temporary limitation, need to support code splitting for PTX + const Driver &D = C.getDriver(); + const std::string &OptName = + D.getOpts() + .getOption(options::OPT_fsycl_device_code_split) + .getPrefixedName(); + D.Diag(diag::err_drv_unsupported_opt_for_target) + << OptName << (*TC)->getTriple().str(); + } auto TT = SYCLTripleList[I]; - bool SYCLAOTCompile = - (TT.getSubArch() != llvm::Triple::NoSubArch && - (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || - TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || - TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64)); + bool isSpirvAOT = TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga || + TT.getSubArch() == llvm::Triple::SPIRSubArch_gen || + TT.getSubArch() == llvm::Triple::SPIRSubArch_x86_64; + // reflects whether current target is ahead-of-time and can't support + // runtime setting of specialization constants + bool isAOT = isNVPTX || isSpirvAOT; + // TODO support device code split for NVPTX target - // After the Link, wrap the files before the final host link - if (SYCLAOTCompile) { - OutType = types::TY_Tempfilelist; - if (!DeviceCodeSplit) { - OutType = (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga) - ? FPGAOutType - : types::TY_Image; + ActionList WrapperInputs; + // post link is not optional - even if not splitting, always need to + // process specialization constants + bool MultiFileActionDeps = !isSpirvAOT || DeviceCodeSplit; + types::ID PostLinkOutType = isNVPTX || !MultiFileActionDeps + ? types::TY_LLVM_BC + : types::TY_Tempfiletable; + auto *PostLinkAction = C.MakeAction( + DeviceLinkAction, PostLinkOutType); + PostLinkAction->setRTSetsSpecConstants(!isAOT); + + if (isNVPTX) { + Action *FinAction = + finalizeNVPTXDependences(PostLinkAction, (*TC)->getTriple()); + WrapperInputs.push_back(FinAction); + } else { + // For SPIRV-based targets - translate to SPIRV then optionally + // compile ahead-of-time to native architecture + Action *SPIRVInput = PostLinkAction; + constexpr char COL_CODE[] = "Code"; + + if (MultiFileActionDeps) { + auto *ExtractIRFilesAction = C.MakeAction( + PostLinkAction, types::TY_Tempfilelist); + // single column w/o title fits TY_Tempfilelist format + ExtractIRFilesAction->addExtractColumnTform(COL_CODE, + false /*drop titles*/); + SPIRVInput = ExtractIRFilesAction; + } + types::ID SPIRVOutType = + MultiFileActionDeps ? types::TY_Tempfilelist : types::TY_SPIRV; + Action *BuildCodeAction = + C.MakeAction(SPIRVInput, SPIRVOutType); + + // After the Link, wrap the files before the final host link + if (isSpirvAOT) { + types::ID OutType = types::TY_Tempfilelist; + if (!DeviceCodeSplit) { + OutType = (TT.getSubArch() == llvm::Triple::SPIRSubArch_fpga) + ? FPGAOutType + : types::TY_Image; + } + // Do the additional Ahead of Time compilation when the specific + // triple calls for it (provided a valid subarch). + ActionList BEInputs; + BEInputs.push_back(BuildCodeAction); + for (Action *A : FPGAObjectInputs) { + // Send any known objects through the unbundler to grab the + // dependency file associated. + ActionList AL; + AL.push_back(A); + Action *UnbundleAction = C.MakeAction( + AL, types::TY_FPGA_Dependencies); + BEInputs.push_back(UnbundleAction); + } + for (const auto &A : DeviceLibObjects) + BEInputs.push_back(A); + BuildCodeAction = + C.MakeAction(BEInputs, OutType); } - // Do the additional Ahead of Time compilation when the specific - // triple calls for it (provided a valid subarch). - Action *DeviceBECompileAction; - ActionList BEActionList; - BEActionList.push_back(DeviceLinkAction); - for (Action *A : FPGAObjectInputs) { - // Send any known objects through the unbundler to grab the - // dependency file associated. - ActionList AL; - AL.push_back(A); - Action *UnbundleAction = C.MakeAction( - AL, types::TY_FPGA_Dependencies); - BEActionList.push_back(UnbundleAction); + if (MultiFileActionDeps) { + ActionList TformInputs{PostLinkAction, BuildCodeAction}; + auto *ReplaceFilesAction = C.MakeAction( + TformInputs, types::TY_Tempfiletable); + ReplaceFilesAction->addReplaceColumnTform(COL_CODE, COL_CODE); + BuildCodeAction = ReplaceFilesAction; } - for (const auto &A : DeviceLibObjects) - BEActionList.push_back(A); - DeviceBECompileAction = - C.MakeAction(BEActionList, OutType); - WrapperInputs.push_back(DeviceBECompileAction); - auto *DeviceWrappingAction = C.MakeAction( - WrapperInputs, types::TY_Object); + WrapperInputs.push_back(BuildCodeAction); + } + // After the Link, wrap the files before the final host link + auto *DeviceWrappingAction = C.MakeAction( + WrapperInputs, types::TY_Object); + + if (isSpirvAOT) DA.add(*DeviceWrappingAction, **TC, /*BoundArch=*/nullptr, Action::OFK_SYCL); - } else { - WrapperInputs.push_back(DeviceLinkAction); - auto *DeviceWrappingAction = C.MakeAction( - WrapperInputs, types::TY_Object); + else withBoundArchForToolChain(*TC, [&](const char *BoundArch) { DA.add(*DeviceWrappingAction, **TC, BoundArch, Action::OFK_SYCL); }); - } ++TC; ++I; } @@ -3903,6 +4000,8 @@ class OffloadingActionBuilder final { WrapDeviceOnlyBinary = Args.hasArg(options::OPT_fsycl_link_EQ); auto *DeviceCodeSplitArg = Args.getLastArg(options::OPT_fsycl_device_code_split_EQ); + // -fsycl-device-code-split is an alias to + // -fsycl-device-code-split=per_source DeviceCodeSplit = DeviceCodeSplitArg && DeviceCodeSplitArg->getValue() != StringRef("off"); // Device only compilation for -fsycl-link (no FPGA) and diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index a4c697563a76..215d632e53c9 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -342,6 +342,12 @@ Tool *ToolChain::getBackendCompiler() const { return BackendCompiler.get(); } +Tool *ToolChain::getTableTform() const { + if (!FileTableTform) + FileTableTform.reset(new tools::FileTableTform(*this)); + return FileTableTform.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -392,6 +398,9 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const { case Action::BackendCompileJobClass: return getBackendCompiler(); + + case Action::FileTableTformJobClass: + return getTableTform(); } llvm_unreachable("Invalid tool kind."); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9997995827b6..08d4990ef8a7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Clang.h" +#include "AMDGPU.h" #include "Arch/AArch64.h" #include "Arch/ARM.h" #include "Arch/Mips.h" @@ -15,11 +16,10 @@ #include "Arch/Sparc.h" #include "Arch/SystemZ.h" #include "Arch/X86.h" -#include "AMDGPU.h" #include "CommonArgs.h" #include "Hexagon.h" -#include "MSP430.h" #include "InputInfo.h" +#include "MSP430.h" #include "PS4CPU.h" #include "SYCL.h" #include "clang/Basic/CharInfo.h" @@ -35,6 +35,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compression.h" #include "llvm/Support/FileSystem.h" @@ -4118,6 +4119,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // We want to compile sycl kernels. CmdArgs.push_back("-fsycl"); CmdArgs.push_back("-fsycl-is-device"); + CmdArgs.push_back("-fdeclare-spirv-builtins"); // Pass the triple of host when doing SYCL auto AuxT = llvm::Triple(llvm::sys::getProcessTriple()); std::string NormalizedTriple = AuxT.normalize(); @@ -4141,10 +4143,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, } } - if (Triple.isSPIR()) { - CmdArgs.push_back("-disable-llvm-passes"); - } - if (Args.hasFlag(options::OPT_fsycl_allow_func_ptr, options::OPT_fno_sycl_allow_func_ptr, false)) { CmdArgs.push_back("-fsycl-allow-func-ptr"); @@ -6836,9 +6834,12 @@ const char *Clang::getDependencyFileName(const ArgList &Args, if (Arg *OutputOpt = Args.getLastArg(options::OPT_o, options::OPT__SLASH_Fo)) { - SmallString<128> OutputFilename(OutputOpt->getValue()); - llvm::sys::path::replace_extension(OutputFilename, llvm::Twine('d')); - return Args.MakeArgString(OutputFilename); + SmallString<128> OutputArgument(OutputOpt->getValue()); + if (llvm::sys::path::is_separator(OutputArgument.back())) + // If the argument is a directory, output to BaseName in that dir. + llvm::sys::path::append(OutputArgument, getBaseInputStem(Args, Inputs)); + llvm::sys::path::replace_extension(OutputArgument, llvm::Twine('d')); + return Args.MakeArgString(OutputArgument); } return Args.MakeArgString(Twine(getBaseInputStem(Args, Inputs)) + ".d"); @@ -7427,70 +7428,23 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, WrapperArgs.push_back( C.getArgs().MakeArgString(Twine("-kind=") + Twine(Kind))); - ArgStringList ForeachArgs; - - for (const InputInfo &I : Inputs) { - assert(I.isFilename() && "Invalid input."); - std::string FileName(I.getFilename()); - if (I.getType() == types::TY_Tempfilelist || - I.getType() == types::TY_TempEntriesfilelist) { - ForeachArgs.push_back( - C.getArgs().MakeArgString("--in-file-list=" + FileName)); - ForeachArgs.push_back( - C.getArgs().MakeArgString("--in-replace=" + FileName)); - - if (I.getType() == types::TY_TempEntriesfilelist) { - WrapperArgs.push_back( - C.getArgs().MakeArgString("-entries=" + FileName)); - continue; - } - } - WrapperArgs.push_back(C.getArgs().MakeArgString(FileName)); - } + assert((Inputs.size() > 0) && "no inputs for clang-offload-wrapper"); + assert(((Inputs[0].getType() != types::TY_Tempfiletable) || + (Inputs.size() == 1)) && + "wrong usage of clang-offload-wrapper with SYCL"); + const InputInfo &I = Inputs[0]; + assert(I.isFilename() && "Invalid input."); + + if (I.getType() == types::TY_Tempfiletable) + // wrapper actual input files are passed via the batch job file table: + WrapperArgs.push_back(C.getArgs().MakeArgString("-batch")); + WrapperArgs.push_back(C.getArgs().MakeArgString(I.getFilename())); auto Cmd = std::make_unique( JA, *this, TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), WrapperArgs, None); - if (!ForeachArgs.empty()) { - std::string ForeachOutName = - C.getDriver().GetTemporaryPath("wrapper-linker", "txt"); - const char *ForeachOutput = C.addTempFile( - C.getArgs().MakeArgString(ForeachOutName), types::TY_Tempfilelist); - SmallString<128> OutOpt("--out-file-list="); - OutOpt += ForeachOutput; - - // Construct llvm-foreach command. - // The llvm-foreach command looks like this: - // llvm-foreach --in-file-list=a.list --in-replace='{}' -- echo '{}' - ForeachArgs.push_back(C.getArgs().MakeArgString(OutOpt)); - ForeachArgs.push_back( - C.getArgs().MakeArgString("--out-replace=" + OutTmpName)); - ForeachArgs.push_back(C.getArgs().MakeArgString("--")); - - ForeachArgs.push_back(Cmd->getExecutable()); - for (auto &Arg : WrapperArgs) - ForeachArgs.push_back(Arg); - - SmallString<128> ForeachPath(C.getDriver().Dir); - llvm::sys::path::append(ForeachPath, "llvm-foreach"); - const char *Foreach = C.getArgs().MakeArgString(ForeachPath); - C.addCommand( - std::make_unique(JA, *this, Foreach, ForeachArgs, None)); - - // Construct llvm-link command. - SmallString<128> InOpt("@"); - InOpt += ForeachOutName; - ArgStringList LLVMLinkArgs{C.getArgs().MakeArgString("-o"), - WrapperFileName, - C.getArgs().MakeArgString(InOpt)}; - SmallString<128> LLVMLinkPath(C.getDriver().Dir); - llvm::sys::path::append(LLVMLinkPath, "llvm-link"); - const char *LLVMLink = C.getArgs().MakeArgString(LLVMLinkPath); - C.addCommand( - std::make_unique(JA, *this, LLVMLink, LLVMLinkArgs, None)); - } else - C.addCommand(std::move(Cmd)); + C.addCommand(std::move(Cmd)); // Construct llc command. // The output is an object file @@ -7512,7 +7466,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, const char *Llc = C.getArgs().MakeArgString(LlcPath); C.addCommand(std::make_unique(JA, *this, Llc, LlcArgs, None)); return; - } + } // end of SYCL flavor of offload wrapper command creation ArgStringList CmdArgs; @@ -7661,6 +7615,19 @@ void SPIRCheck::ConstructJob(Compilation &C, const JobAction &JA, C.addCommand(std::move(Cmd)); } +static void addArgs(ArgStringList &DstArgs, const llvm::opt::ArgList &Alloc, + ArrayRef SrcArgs) { + for (const auto Arg : SrcArgs) { + DstArgs.push_back(Alloc.MakeArgString(Arg)); + } +} + +// sycl-post-link tool normally outputs a file table (see the tool sources for +// format description) which lists all the other output files associated with +// the device LLVMIR bitcode. This is basically a triple of bitcode, symbols +// and specialization constant files. Single LLVM IR output can be generated as +// well under an option. +// void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -7668,39 +7635,115 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, const char *LinkingOutput) const { // Construct sycl-post-link command. assert(isa(JA) && "Expecting SYCL post link job!"); + ArgStringList CmdArgs; - // Variants of split command look like this: - // sycl-post-link input_file.bc -ir-files-list=ir.txt -o base_output - for - // IR files generation. - // sycl-post-link input_file.bc -txt-files-list=files.txt -o base_output - for - // entries files generation. + // See if device code splitting is requested + if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) { + if (StringRef(A->getValue()) == "per_kernel") + addArgs(CmdArgs, TCArgs, {"-split=kernel"}); + else if (StringRef(A->getValue()) == "per_source") + addArgs(CmdArgs, TCArgs, {"-split=source"}); + else + // split must be off + assert(StringRef(A->getValue()) == "off"); + } + // OPT_fsycl_device_code_split is not checked as it is an alias to + // -fsycl-device-code-split=per_source - ArgStringList CmdArgs; - InputInfo Input = Inputs.front(); - const char *InputFileName = Input.getFilename(); + if (JA.getType() == types::TY_LLVM_BC) { + // single file output requested - this means only perform necessary IR + // transformations (like specialization constant intrinsic lowering) and + // output LLVMIR + addArgs(CmdArgs, TCArgs, {"-ir-output-only"}); + } else { + assert(JA.getType() == types::TY_Tempfiletable); + // Symbol file and specialization constant info generation is mandatory - + // add options unconditionally + addArgs(CmdArgs, TCArgs, {"-symbols"}); + } + // specialization constants processing is mandatory + if (llvm::dyn_cast(&JA)->getRTSetsSpecConstants()) + addArgs(CmdArgs, TCArgs, {"-spec-const=rt"}); + else + addArgs(CmdArgs, TCArgs, {"-spec-const=default"}); - CmdArgs.push_back(InputFileName); - std::string OutputFileName(Output.getFilename()); - if (Output.getType() == types::TY_Tempfilelist) - CmdArgs.push_back(TCArgs.MakeArgString("-ir-files-list=" + OutputFileName)); - else if (Output.getType() == types::TY_TempEntriesfilelist) - CmdArgs.push_back( - TCArgs.MakeArgString("-txt-files-list=" + OutputFileName)); - SmallString<128> TmpName; - llvm::sys::fs::createUniquePath("split-%%%%%%", TmpName, - /*MakeAbsolute*/ true); - CmdArgs.push_back(TCArgs.MakeArgString("-o")); - CmdArgs.push_back(TCArgs.MakeArgString(TmpName)); + // Add output file table file option + assert(Output.isFilename() && "output must be a filename"); + addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()}); - if (Arg *A = TCArgs.getLastArg(options::OPT_fsycl_device_code_split_EQ)) - if (A->getValue() == StringRef("per_kernel")) - CmdArgs.push_back("-one-kernel"); + // Add input file + assert(Inputs.size() == 1 && Inputs.front().isFilename() && + "single input file expected"); + addArgs(CmdArgs, TCArgs, {Inputs.front().getFilename()}); + std::string OutputFileName(Output.getFilename()); // All the inputs are encoded as commands. C.addCommand(std::make_unique( JA, *this, TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), - CmdArgs, None)); + CmdArgs, Inputs)); +} + +// Transforms the abstract representation (JA + Inputs + Outputs) of a file +// table transformation action to concrete command line (job) with actual +// inputs/outputs/options, and adds it to given compilation object. +void FileTableTform::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const { + + const auto &TformJob = *llvm::dyn_cast(&JA); + ArgStringList CmdArgs; + + // don't try to assert here whether the number of inputs is OK, argumnets are + // OK, etc. - better invoke the tool and see good error diagnostics + + // 1) add transformations + for (const auto &Tf : TformJob.getTforms()) { + switch (Tf.TheKind) { + case FileTableTformJobAction::Tform::EXTRACT: + case FileTableTformJobAction::Tform::EXTRACT_DROP_TITLE: { + SmallString<128> Arg("-extract="); + Arg += Tf.TheArgs[0]; + + for (unsigned I = 1; I < Tf.TheArgs.size(); ++I) { + Arg += ","; + Arg += Tf.TheArgs[I]; + } + addArgs(CmdArgs, TCArgs, {Arg}); + + if (Tf.TheKind == FileTableTformJobAction::Tform::EXTRACT_DROP_TITLE) + addArgs(CmdArgs, TCArgs, {"-drop_titles"}); + break; + } + case FileTableTformJobAction::Tform::REPLACE: { + assert(Tf.TheArgs.size() == 2 && "from/to column names expected"); + SmallString<128> Arg("-replace="); + Arg += Tf.TheArgs[0]; + Arg += ","; + Arg += Tf.TheArgs[1]; + addArgs(CmdArgs, TCArgs, {Arg}); + break; + } + default: + llvm_unreachable("unknown file table transformation kind"); + } + } + // 2) add output option + assert(Output.isFilename() && "table tform output must be a file"); + addArgs(CmdArgs, TCArgs, {"-o", Output.getFilename()}); + + // 3) add inputs + for (const auto &Input : Inputs) { + assert(Input.isFilename() && "table tform input must be a file"); + addArgs(CmdArgs, TCArgs, {Input.getFilename()}); + } + // 4) finally construct and add a command to the compilation + C.addCommand(std::make_unique( + JA, *this, + TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs)); } // For Linux, we have initial support for fat archives (archives which diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index f1467e40746e..1552515c1461 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -206,6 +206,20 @@ class LLVM_LIBRARY_VISIBILITY SYCLPostLink final : public Tool { const char *LinkingOutput) const override; }; +/// File table transformation tool. +class LLVM_LIBRARY_VISIBILITY FileTableTform final : public Tool { +public: + FileTableTform(const ToolChain &TC) + : Tool("File table transformation", "file-table-tform", TC) {} + + bool hasIntegratedCPP() const override { return false; } + bool hasGoodDiagnostics() const override { return true; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + /// Partially link objects and archives. class LLVM_LIBRARY_VISIBILITY PartialLink final : public Tool { public: @@ -218,6 +232,7 @@ class LLVM_LIBRARY_VISIBILITY PartialLink final : public Tool { const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override; }; + } // end namespace tools } // end namespace driver diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 3b73a59c6292..cd2f6e33378c 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -232,20 +232,37 @@ void SYCL::fpga::BackendCompiler::ConstructJob(Compilation &C, ForeachExt = "aocr"; } + StringRef createdReportName; for (auto *A : Args) { - // Any input file is assumed to have a dependency file associated - if (A->getOption().getKind() == Option::InputClass) { - SmallString<128> FN(A->getSpelling()); - StringRef Ext(llvm::sys::path::extension(FN)); - if (!Ext.empty()) { - types::ID Ty = getToolChain().LookupTypeForExtension(Ext.drop_front()); - if (Ty == types::TY_INVALID) - continue; - if (types::isSrcFile(Ty)) { - llvm::sys::path::replace_extension(FN, "d"); - FPGADepFiles.push_back(InputInfo(types::TY_Dependencies, - Args.MakeArgString(FN), Args.MakeArgString(FN))); - } + // Any input file is assumed to have a dependency file associated and + // the report folder can also be named based on the first input. + if (A->getOption().getKind() != Option::InputClass) + continue; + SmallString<128> ArgName(A->getSpelling()); + StringRef Ext(llvm::sys::path::extension(ArgName)); + if (Ext.empty()) + continue; + types::ID Ty = getToolChain().LookupTypeForExtension(Ext.drop_front()); + if (Ty == types::TY_INVALID) + continue; + if (types::isSrcFile(Ty) || Ty == types::TY_Object) { + // Dependency files and the project report are created in CWD, so strip + // off any directory information if provided with the input file. + // TODO - Use temporary files for dependency file creation and + // usage with -fintelfpga. + ArgName = llvm::sys::path::filename(ArgName); + if (types::isSrcFile(Ty)) { + SmallString<128> DepName(ArgName); + llvm::sys::path::replace_extension(DepName, "d"); + FPGADepFiles.push_back(InputInfo(types::TY_Dependencies, + Args.MakeArgString(DepName), + Args.MakeArgString(DepName))); + } + if (createdReportName.empty()) { + // Project report should be saved into CWD, so strip off any + // directory information if provided with the input file. + llvm::sys::path::replace_extension(ArgName, "prj"); + createdReportName = Args.MakeArgString(ArgName); } } } @@ -270,26 +287,10 @@ void SYCL::fpga::BackendCompiler::ConstructJob(Compilation &C, const char * FolderName = Args.MakeArgString(FN); ReportOptArg += FolderName; } else { - // Output directory is based off of the first object name - for (Arg * Cur : Args) { - if (Cur->getOption().getKind() != Option::InputClass) - continue; - SmallString<128> ArgName = Cur->getSpelling(); - StringRef Ext(llvm::sys::path::extension(ArgName)); - if (Ext.empty()) - continue; - types::ID Ty = getToolChain().LookupTypeForExtension(Ext.drop_front()); - if (Ty == types::TY_INVALID) - continue; - if (types::isSrcFile(Ty) || Ty == types::TY_Object) { - // Project report should be saved into CWD, so strip off any - // directory information if provided with the input file. - ArgName = llvm::sys::path::filename(ArgName); - llvm::sys::path::replace_extension(ArgName, "prj"); - ReportOptArg += Args.MakeArgString(ArgName); - break; - } - } + // Output directory is based off of the first object name as captured + // above. + if (!createdReportName.empty()) + ReportOptArg += createdReportName; } if (!ReportOptArg.empty()) CmdArgs.push_back(C.getArgs().MakeArgString( diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 373d6dae9d6d..0181002543f9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -780,7 +780,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Args.getLastArg(OPT_emit_llvm_uselists, OPT_no_emit_llvm_uselists)) Opts.EmitLLVMUseLists = A->getOption().getID() == OPT_emit_llvm_uselists; - Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes); + Opts.DisableLLVMPasses = + Args.hasArg(OPT_disable_llvm_passes) || + (Args.hasArg(OPT_fsycl_is_device) && Triple.isSPIR() && + !Args.hasArg(OPT_fsycl_enable_optimizations)); Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers); const llvm::Triple::ArchType DebugEntryValueArchs[] = { diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index c3de1f022cf5..8e9676441155 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -467,6 +467,10 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("CL_SYCL_LANGUAGE_VERSION", "121"); } + if (LangOpts.DeclareSPIRVBuiltins) { + Builder.defineMacro("__SPIRV_BUILTIN_DECLARATIONS__"); + } + // Not "standard" per se, but available even with the -undef flag. if (LangOpts.AsmPreprocessor) Builder.defineMacro("__ASSEMBLER__"); diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index cc278410d6e4..bfeab9df9bcf 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -2519,6 +2519,12 @@ bool Parser::ParseSYCLLoopAttributes(ParsedAttributes &Attrs) { if (Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGAIVDep && Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGAII && Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency && + Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGALoopCoalesce && + Attrs.begin()->getKind() != + ParsedAttr::AT_SYCLIntelFPGADisableLoopPipelining && + Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGAMaxInterleaving && + Attrs.begin()->getKind() != + ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations && Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint) return true; diff --git a/clang/lib/Sema/SPIRVBuiltins.td b/clang/lib/Sema/SPIRVBuiltins.td index f11f97685e0f..69c8f7b82c54 100644 --- a/clang/lib/Sema/SPIRVBuiltins.td +++ b/clang/lib/Sema/SPIRVBuiltins.td @@ -30,6 +30,10 @@ def SPIRVAll : Version< 0>; class AddressSpace { string Name = _AS; } +// Default is important for the frontend as there is not necessarily +// an automatic conversion from this address space to +// the one it will be lowered to. +// This file assumes it will get lowered to generic or private. def DefaultAS : AddressSpace<"clang::LangAS::Default">; def PrivateAS : AddressSpace<"clang::LangAS::sycl_private">; def GlobalAS : AddressSpace<"clang::LangAS::sycl_global">; @@ -267,19 +271,40 @@ class Builtin _Signature, list _Attributes = Attr. // Helper to declare SPIR-V Core builtins. class SPVBuiltin _Signature, list _Attributes = Attr.None> : -Builtin<"__spirv_" # _Name, _Signature, _Attributes> {} + Builtin<"__spirv_" # _Name, _Signature, _Attributes> {} // Helper to declare OpenCL SPIR-V extended set builtins. class OCLSPVBuiltin _Signature, list _Attributes = Attr.None> : -SPVBuiltin<"ocl_" # _Name, _Signature, _Attributes> {} + SPVBuiltin<"ocl_" # _Name, _Signature, _Attributes> {} + +class ConstOCLSPVBuiltin _Signature> : + OCLSPVBuiltin<_Name, _Signature, Attr.Const> {} //===----------------------------------------------------------------------===// // Definitions of types //===----------------------------------------------------------------------===// +// OpenCL v1.0/1.2/2.0 s6.1.1: Built-in Scalar Data Types. +def Bool : IntType<"bool", QualType<"BoolTy">, 1>; +def TrueChar : IntType<"char", QualType<"CharTy", 0, 1>, 8>; +def Char : IntType<"schar", QualType<"SignedCharTy", 0, 1>, 8>; +def SChar : IntType<"schar", QualType<"SignedCharTy", 0, 1>, 8>; +def UChar : UIntType<"uchar", QualType<"UnsignedCharTy">, 8>; +def Short : IntType<"short", QualType<"ShortTy", 0, 1>, 16>; +def UShort : UIntType<"ushort", QualType<"UnsignedShortTy">, 16>; +def Int : IntType<"int", QualType<"IntTy", 0, 1>, 32>; +def UInt : UIntType<"uint", QualType<"UnsignedIntTy">, 32>; +def Long : IntType<"long", QualType<"getIntTypeForBitwidth(64, true)", 0, 1>, 64>; +def ULong : UIntType<"ulong", QualType<"getIntTypeForBitwidth(64, false)">, 64>; def Float : FPType<"float", QualType<"FloatTy">, 32>; def Double : FPType<"double", QualType<"DoubleTy">, 64>; def Half : FPType<"half", QualType<"Float16Ty">, 16>; +def Void : Type<"void", QualType<"VoidTy">>; +// FIXME: ensure this is portable... +def Size : Type<"size_t", QualType<"getSizeType()">>; + +def Sampler : Type<"sampler_t", QualType<"OCLSamplerTy">>; +def Event : Type<"event_t", QualType<"OCLEventTy">>; //===----------------------------------------------------------------------===// // Definitions of gentype variants @@ -287,14 +312,77 @@ def Half : FPType<"half", QualType<"Float16Ty">, 16>; // Vector width lists. def VecAndScalar: IntList<"VecAndScalar", [1, 2, 3, 4, 8, 16]>; +def VecNoScalar : IntList<"VecNoScalar", [2, 3, 4, 8, 16]>; +def Vec1 : IntList<"Vec1", [1]>; +def Vec2 : IntList<"Vec2", [2]>; +def Vec4 : IntList<"Vec4", [4]>; +def Vec8 : IntList<"Vec8", [8]>; +def Vec16 : IntList<"Vec16", [16]>; +def Vec1234 : IntList<"Vec1234", [1, 2, 3, 4]>; // Type lists. +def TLAll : TypeList<[Char, UChar, Short, UShort, Int, UInt, Long, ULong, Float, Double, Half]>; +def TLAllUnsigned : TypeList<[UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong, UInt, ULong, UShort]>; def TLFloat : TypeList<[Float, Double, Half]>; - +// FIXME: handle properly char (signed or unsigned depending on host) +def TLSignedInts : TypeList<[Char, Short, Int, Long]>; +def TLUnsignedInts : TypeList<[UChar, UShort, UInt, ULong]>; + +// Signed to Unsigned conversion +// FIXME: handle properly char (signed or unsigned depending on host) +def TLSToUSignedInts : TypeList<[Char, Short, Int, Long]>; +def TLSToUUnsignedInts : TypeList<[UChar, UShort, UInt, ULong]>; + +def TLIntLongFloats : TypeList<[Int, UInt, Long, ULong, Float, Double, Half]>; + +// All unsigned integer types twice, to facilitate unsigned return types for e.g. +// uchar abs(char) and +// uchar abs(uchar). +def TLAllUIntsTwice : TypeList<[UChar, UChar, UChar, UShort, UShort, UInt, UInt, ULong, ULong]>; + +def TLAllInts : TypeList<[Char, UChar, Short, UShort, Int, UInt, Long, ULong]>; + +// GenType definitions for multiple base types (e.g. all floating point types, +// or all integer types). +// All types +def AGenType1 : GenericType<"AGenType1", TLAll, Vec1>; +def AGenTypeN : GenericType<"AGenTypeN", TLAll, VecAndScalar>; +def AGenTypeNNoScalar : GenericType<"AGenTypeNNoScalar", TLAll, VecNoScalar>; +// All integer +def AIGenType1 : GenericType<"AIGenType1", TLAllInts, Vec1>; +def AIGenTypeN : GenericType<"AIGenTypeN", TLAllInts, VecAndScalar>; +def AUIGenTypeN : GenericType<"AUIGenTypeN", TLUnsignedInts, VecAndScalar>; +def ASIGenTypeN : GenericType<"ASIGenTypeN", TLSignedInts, VecAndScalar>; +def AIGenTypeNNoScalar : GenericType<"AIGenTypeNNoScalar", TLAllInts, VecNoScalar>; +// All integer to unsigned +def AI2UGenTypeN : GenericType<"AI2UGenTypeN", TLAllUIntsTwice, VecAndScalar>; +// Signed integer +def SGenTypeN : GenericType<"SGenTypeN", TLSignedInts, VecAndScalar>; +// Unsigned integer +def UGenTypeN : GenericType<"UGenTypeN", TLUnsignedInts, VecAndScalar>; // Float def FGenTypeN : GenericType<"FGenTypeN", TLFloat, VecAndScalar>; +// (u)int, (u)long, and all floats +def IntLongFloatGenType1 : GenericType<"IntLongFloatGenType1", TLIntLongFloats, Vec1>; + +// GenType definitions for every single base type (e.g. fp32 only). +// Names are like: GenTypeFloatVecAndScalar. +foreach Type = [Bool, Char, UChar, Short, UShort, + Int, UInt, Long, ULong, + Float, Double, Half] in { + foreach VecSizes = [VecAndScalar, VecNoScalar] in { + def "GenType" # Type # VecSizes : + GenericType<"GenType" # Type # VecSizes, + TypeList<[Type]>, VecSizes>; + } +} - +// GenType definitions for vec1234. +foreach Type = [Float, Double, Half] in { + def "GenType" # Type # Vec1234 : + GenericType<"GenType" # Type # Vec1234, + TypeList<[Type]>, Vec1234>; +} //===----------------------------------------------------------------------===// // Definitions of builtins @@ -303,5 +391,513 @@ def FGenTypeN : GenericType<"FGenTypeN", TLFloat, VecAndScalar>; // 2.1. Math extended instructions -def : OCLSPVBuiltin<"acos", [FGenTypeN, FGenTypeN], Attr.Const>; +foreach name = ["acos", "acosh", "acospi", + "asin", "asinh", "asinpi", + "atan", "atanh", "atanpi", + "cbrt", "ceil", "cos", + "cosh", "cospi", + "erfc", "erf", + "exp", "exp2", "exp10", + "expm1", "fabs", "floor", "lgamma", + "log", "log2", "log10", "log1p", "logb", + "rint", "round", "rsqrt", + "sin", "sinh", "sinpi", + "sqrt", + "tan", "tanh", "tanpi", + "tgamma", "trunc"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["fmax", "fmin", "fmod", + "atan2", "atan2pi", + "copysign", "fdim", "hypot", + "maxmag", "minmag", "nextafter", + "pow", "powr", "remainder"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["fma", "mad"] in { + def : ConstOCLSPVBuiltin; +} + +foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS, DefaultAS] in { + foreach name = ["fract", "modf"] in { + def : OCLSPVBuiltin]>; + } + + foreach name = ["frexp", "lgamma_r"] in { + foreach Type = [GenTypeFloatVecAndScalar, GenTypeDoubleVecAndScalar, GenTypeHalfVecAndScalar] in { + def : OCLSPVBuiltin]>; + } + } +} + +foreach name = ["ilogb"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +foreach name = ["ldexp"] in { + foreach Type = [GenTypeFloatVecAndScalar, GenTypeDoubleVecAndScalar, GenTypeHalfVecAndScalar] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + } +} + +foreach name = ["nan"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +foreach name = ["pown"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS, DefaultAS] in { + foreach name = ["remquo"] in { + foreach Type = [GenTypeFloatVecAndScalar, GenTypeDoubleVecAndScalar, GenTypeHalfVecAndScalar] in { + def : OCLSPVBuiltin]>; + } + } +} + +foreach name = ["rootn"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS, DefaultAS] in { + foreach name = ["sincos"] in { + def : OCLSPVBuiltin]>; + } +} + +foreach name = ["half_cos", + "half_exp", "half_exp2", "half_exp10", + "half_log", "half_log2", "half_log10", + "half_recip", "half_rsqrt", + "half_sin", "half_sqrt", "half_tan"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["half_divide", "half_powr"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["native_cos", "native_exp", "native_exp2", "native_exp10", + "native_log", "native_log2", "native_log10", + "native_recip", "native_rsqrt", + "native_sin", "native_sqrt", "native_tan"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["native_divide", "native_powr"] in { + def : ConstOCLSPVBuiltin; +} + +// 2.2. Integer instructions + +foreach name = ["clz", "ctz", "popcount"] in { + def : ConstOCLSPVBuiltin; +} + +def : ConstOCLSPVBuiltin<"rotate", [AIGenTypeN, AIGenTypeN, AIGenTypeN]>; + +def : ConstOCLSPVBuiltin<"s_abs", [AUIGenTypeN, ASIGenTypeN]>; + +def : ConstOCLSPVBuiltin<"s_abs_diff", [AUIGenTypeN, ASIGenTypeN, ASIGenTypeN]>; + +foreach name = ["s_add_sat", + "s_hadd", "s_rhadd", + "s_max", "s_min", + "s_mul_hi", "s_sub_sat"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["s_clamp", "s_mad_hi", "s_mad_sat"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["s_upsample"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +def : ConstOCLSPVBuiltin<"s_mad24", [GenTypeIntVecAndScalar, GenTypeIntVecAndScalar, GenTypeIntVecAndScalar, GenTypeIntVecAndScalar]>; + +def : ConstOCLSPVBuiltin<"s_mul24", [GenTypeIntVecAndScalar, GenTypeIntVecAndScalar, GenTypeIntVecAndScalar]>; + +foreach name = ["u_add_sat", "u_hadd", + "u_rhadd", + "u_max", "u_min", "u_sub_sat", + "u_abs_diff", "u_mul_hi"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["u_clamp", "u_mad_sat", "u_mad_hi"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["u_upsample"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +def : ConstOCLSPVBuiltin<"u_mad24", [GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar]>; + +def : ConstOCLSPVBuiltin<"u_mul24", [GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar, GenTypeUIntVecAndScalar]>; + +def : ConstOCLSPVBuiltin<"u_abs", [AUIGenTypeN, AUIGenTypeN]>; + +// 2.3. Common instructions + +foreach name = ["degrees", "radians", "sign"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["fmax_common", "fmin_common", "step"] in { + def : ConstOCLSPVBuiltin; +} + +foreach name = ["fclamp", "mix", "smoothstep"] in { + def : ConstOCLSPVBuiltin; +} + +// 2.4. Geometric instructions + +foreach name = ["cross"] in { + foreach VSize = [3, 4] in { + def : ConstOCLSPVBuiltin, VectorType, VectorType]>; + def : ConstOCLSPVBuiltin, VectorType, VectorType]>; + def : ConstOCLSPVBuiltin, VectorType, VectorType]>; + } +} + +foreach name = ["distance"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +foreach name = ["length"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +foreach name = ["normalize"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +def : ConstOCLSPVBuiltin<"fast_distance", [Float, GenTypeFloatVec1234, GenTypeFloatVec1234]>; + +def : ConstOCLSPVBuiltin<"fast_length", [Float, GenTypeFloatVec1234]>; + +def : ConstOCLSPVBuiltin<"fast_normalize", [GenTypeFloatVec1234, GenTypeFloatVec1234]>; + +// 2.5. Relational instructions + +def : ConstOCLSPVBuiltin<"bitselect", [AGenTypeN, AGenTypeN, AGenTypeN, AGenTypeN]>; + +foreach name = ["select"] in { + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; + def : ConstOCLSPVBuiltin; +} + +// 2.6. Vector Data Load and Store instructions + +foreach VSize = [2, 3, 4, 8, 16] in { + foreach AS = [GlobalAS, LocalAS, PrivateAS, ConstantAS, GenericAS, DefaultAS] in { + foreach Ty = TLAll.List in { + foreach name = ["vloadn"] in { + def : OCLSPVBuiltin, Size, PointerType, AS>]>; + } + } + foreach name = ["vloada_halfn", "vload_halfn"] in { + def : OCLSPVBuiltin, Size, PointerType, AS>]>; + } + } + foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS, DefaultAS] in { + foreach Ty = TLAll.List in { + foreach name = ["vstoren"] in { + def : OCLSPVBuiltin, Size, PointerType, AS>]>; + } + } + foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in { + foreach name = ["vstore_halfn" # rnd, "vstorea_halfn" # rnd] in { + def : OCLSPVBuiltin, Size, PointerType]>; + def : OCLSPVBuiltin, Size, PointerType]>; + } + } + } +} + +foreach AS = [GlobalAS, LocalAS, PrivateAS, ConstantAS, GenericAS, DefaultAS] in { + foreach name = ["vload_half"] in { + def : OCLSPVBuiltin, AS>]>; + } +} + +foreach AS = [GlobalAS, LocalAS, PrivateAS, GenericAS, DefaultAS] in { + foreach rnd = ["", "_rte", "_rtz", "_rtp", "_rtn"] in { + foreach name = ["vstore_half" # rnd] in { + def : OCLSPVBuiltin]>; + def : OCLSPVBuiltin]>; + } + } +} + +// 2.7. Miscellaneous Vector instructions + +foreach VSize1 = [Vec2, Vec4, Vec8, Vec16] in { + foreach VSize2 = [Vec2, Vec4, Vec8, Vec16] in { + def : OCLSPVBuiltin<"shuffle", [GenericType<"TLAll" # VSize1.Name, TLAll, VSize1>, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAllUnsigned" # VSize1.Name, TLAllUnsigned, VSize1>], + Attr.Const>; + } +} +foreach VSize1 = [Vec2, Vec4, Vec8, Vec16] in { + foreach VSize2 = [Vec2, Vec4, Vec8, Vec16] in { + def : OCLSPVBuiltin<"shuffle2", [GenericType<"TLAll" # VSize1.Name, TLAll, VSize1>, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAll" # VSize2.Name, TLAll, VSize2>, + GenericType<"TLAllUnsigned" # VSize1.Name, TLAllUnsigned, VSize1>], + Attr.Const>; + } +} + +// 2.8. Misc instructions + +let IsVariadic = 1 in { + foreach name = ["printf"] in { + def : OCLSPVBuiltin, ConstantAS>]>; + } +} + +foreach name = ["prefetch"] in { + def : OCLSPVBuiltin, GlobalAS>, Size]>; +} + + +// Core builtins + +// 3.32.8. Memory Instructions + +foreach name = ["GenericPtrMemSemantics"] in { + def : SPVBuiltin, GenericAS>], Attr.Const>; +} + +// 3.32.11. Conversion Instructions + +foreach IType = [UChar, UShort, UInt, ULong] in { + foreach FType = [Float, Double, Half] in { + def : SPVBuiltin<"ConvertFToU_R" # IType.Name, [IType, FType], Attr.Const>; + def : SPVBuiltin<"ConvertUToF_R" # FType.Name, [FType, IType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"ConvertFToU_R" # IType.Name # v, + [VectorType, VectorType], + Attr.Const>; + def : SPVBuiltin<"ConvertUToF_R" # FType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } +} + +foreach IType = [Char, Short, Int, Long] in { + foreach FType = [Float, Double, Half] in { + def : SPVBuiltin<"ConvertFToS_R" # IType.Name, [IType, FType], Attr.Const>; + def : SPVBuiltin<"ConvertSToF_R" # FType.Name, [FType, IType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"ConvertFToS_R" # IType.Name # v, + [VectorType, VectorType], + Attr.Const>; + def : SPVBuiltin<"ConvertSToF_R" # FType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } +} + +foreach InType = TLAll.List in { + foreach OutType = TLUnsignedInts.List in { + if !ne(OutType.ElementSize, InType.ElementSize) then { + def : SPVBuiltin<"UConvert_R" # OutType.Name, [OutType, InType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"UConvert_R" # OutType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } + } + foreach OutType = TLSignedInts.List in { + if !ne(OutType.ElementSize, InType.ElementSize) then { + def : SPVBuiltin<"SConvert_R" # OutType.Name, [OutType, InType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"SConvert_R" # OutType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } + } +} + +foreach InType = TLFloat.List in { + foreach OutType = TLFloat.List in { + if !ne(OutType.ElementSize, InType.ElementSize) then { + def : SPVBuiltin<"FConvert_R" # OutType.Name, [OutType, InType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"FConvert_R" # OutType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } + } +} + +foreach InType = TLSignedInts.List in { + foreach OutType = TLUnsignedInts.List in { + def : SPVBuiltin<"SatConvertSToU_R" # OutType.Name, [OutType, InType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"SatConvertSToU_R" # OutType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } +} + +foreach InType = TLUnsignedInts.List in { + foreach OutType = TLSignedInts.List in { + def : SPVBuiltin<"SatConvertUToS_R" # OutType.Name, [OutType, InType], Attr.Const>; + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"SatConvertUToS_R" # OutType.Name # v, + [VectorType, VectorType], + Attr.Const>; + } + } +} + +foreach AS = [GlobalAS, LocalAS, PrivateAS] in { + def : SPVBuiltin<"GenericCastToPtrExplicit", [PointerType, PointerType], Attr.Const>; +} + +foreach Type = TLFloat.List in { + foreach v = [2, 3, 4, 8, 16] in { + def : SPVBuiltin<"VectorTimesScalar", [VectorType, VectorType, Type]>; + } +} + +foreach name = ["Dot"] in { + def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; +} + +foreach name = ["Any", "All"] in { + def : SPVBuiltin; +} + +foreach name = ["IsNan", "IsInf", "IsFinite", "IsNormal", "SignBitSet"] in { + def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; +} + +foreach name = ["LessOrGreater", + "Ordered", "Unordered", + "FOrdEqual", "FUnordEqual", + "FOrdNotEqual", "FUnordNotEqual", + "FOrdLessThan", "FUnordLessThan", + "FOrdGreaterThan", "FUnordGreaterThan", + "FOrdLessThanEqual", "FUnordLessThanEqual", + "FOrdGreaterThanEqual", "FUnordGreaterThanEqual"] in { + def : SPVBuiltin; + def : SPVBuiltin; + def : SPVBuiltin; +} + +foreach name = ["BitCount"] in { + def : SPVBuiltin; +} + +// 3.32.20. Barrier Instructions + +foreach name = ["ControlBarrier"] in { + // TODO: Allow enum flags instead of UInt ? + // TODO: We should enforce that the UInt must be a literal. + def : SPVBuiltin; +} + +foreach name = ["MemoryBarrier"] in { + // TODO: Allow enum flags instead of UInt ? + // TODO: We should enforce that the UInt must be a literal. + def : SPVBuiltin; +} + +// 3.32.21. Group and Subgroup Instructions + +foreach name = ["GroupAsyncCopy"] in { + // TODO: Allow enum flags instead of UInt ? + // TODO: We should enforce that the UInt must be a literal. + def : SPVBuiltin, PointerType, GlobalAS>, Size, Size, Event], Attr.Convergent>; + def : SPVBuiltin, PointerType, LocalAS>, Size, Size, Event], Attr.Convergent>; +} + +foreach name = ["GroupWaitEvents"] in { + def : SPVBuiltin], Attr.Convergent>; + def : SPVBuiltin], Attr.Convergent>; + def : SPVBuiltin], Attr.Convergent>; +} + +foreach name = ["GroupAll", "GroupAny"] in { + def : SPVBuiltin; +} + +foreach name = ["GroupBroadcast"] in { + foreach IDType = TLAllInts.List in { + def : SPVBuiltin; + def : SPVBuiltin], Attr.Convergent>; + def : SPVBuiltin], Attr.Convergent>; + def : SPVBuiltin; + def : SPVBuiltin], Attr.Convergent>; + def : SPVBuiltin], Attr.Convergent>; + } +} + +foreach name = ["GroupIAdd"] in { + def : SPVBuiltin; +} + +foreach name = ["GroupFAdd", "GroupFMin", "GroupFMax"] in { + def : SPVBuiltin; +} + +foreach name = ["GroupUMin", "GroupUMax"] in { + def : SPVBuiltin; +} + +foreach name = ["GroupSMin", "GroupSMax"] in { + def : SPVBuiltin; +} diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index e1f7a19201a5..ff11e97c5783 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1659,9 +1659,10 @@ Sema::DeviceDiagBuilder Sema::targetDiag(SourceLocation Loc, unsigned DiagID) { if (getLangOpts().CUDA) return getLangOpts().CUDAIsDevice ? CUDADiagIfDeviceCode(Loc, DiagID) : CUDADiagIfHostCode(Loc, DiagID); - // TODO: analyze which usages of targetDiag could be reused for SYCL. - // if (getLangOpts().SYCLIsDevice) - // return SYCLDiagIfDeviceCode(Loc, DiagID); + + if (getLangOpts().SYCLIsDevice) + return SYCLDiagIfDeviceCode(Loc, DiagID); + return DeviceDiagBuilder(DeviceDiagBuilder::K_Immediate, Loc, DiagID, getCurFunctionDecl(), *this); } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 703b215a14a6..80b43ffdaeb5 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -2922,14 +2922,22 @@ static void handleWorkGroupSize(Sema &S, Decl *D, const ParsedAttr &AL) { return; uint32_t WGSize[3]; + if (AL.getKind() == ParsedAttr::AT_ReqdWorkGroupSize && + AL.getAttributeSpellingListIndex() == + ReqdWorkGroupSizeAttr::CXX11_intel_reqd_work_group_size) { + WGSize[1] = ReqdWorkGroupSizeAttr::DefaultYDim; + WGSize[2] = ReqdWorkGroupSizeAttr::DefaultZDim; + } else if (!checkAttributeNumArgs(S, AL, 3)) + return; + for (unsigned i = 0; i < 3; ++i) { - const Expr *E = AL.getArgAsExpr(i); - if (!checkUInt32Argument(S, AL, E, WGSize[i], i, + if (i < AL.getNumArgs() && + !checkUInt32Argument(S, AL, AL.getArgAsExpr(i), WGSize[i], i, /*StrictlyUnsigned=*/true)) return; if (WGSize[i] == 0) { S.Diag(AL.getLoc(), diag::err_attribute_argument_is_zero) - << AL << E->getSourceRange(); + << AL << AL.getArgAsExpr(i)->getSourceRange(); return; } } diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 3fcd4a66381a..9d1478c33e1b 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -78,6 +78,10 @@ class Util { /// \param Tmpl whether the class is template instantiation or simple record static bool isSyclType(const QualType &Ty, StringRef Name, bool Tmpl = false); + /// Checks whether given clang type is a full specialization of the SYCL + /// specialization constant class. + static bool isSyclSpecConstantType(const QualType &Ty); + /// Checks whether given clang type is declared in the given hierarchy of /// declaration contexts. /// \param Ty the clang type being checked @@ -773,6 +777,14 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S, getExprForSpecialSYCLObj(FldType, WrapperFld, WrapperFldCRD, Base, InitMethodName, BodyStmts); + } else if (Util::isSyclSpecConstantType(FldType)) { + // Specialization constants are "invisible" to the + // kernel argument creation and device-side SYCL object + // materialization infrastructure in this source. + // It is OK not to really materialize them on the kernel + // side, because their only use can be via + // 'spec_const_obj.get()' method, which is translated to + // an intrinsic and 'this' is really never used. } else { // Field is a structure or class so change the wrapper // object and recursively search for accessor field. @@ -816,6 +828,8 @@ static CompoundStmt *CreateOpenCLKernelBody(Sema &S, InitExprs.push_back(MemberInit.get()); getExprForSpecialSYCLObj(FieldType, Field, CRD, KernelObjCloneRef, InitMethodName, BodyStmts); + } else if (Util::isSyclSpecConstantType(FieldType)) { + // Just skip specialization constants - not part of signature. } else if (CRD || FieldType->isScalarType()) { // If field has built-in or a structure/class type just initialize // this field with corresponding kernel argument using copy @@ -959,11 +973,13 @@ static bool buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, QualType FldType = WrapperFld->getType(); if (FldType->isStructureOrClassType()) { if (Util::isSyclAccessorType(FldType)) { - // accessor field is found - create descriptor + // Accessor field is found - create descriptor. createSpecialSYCLObjParamDesc(WrapperFld, FldType); + } else if (Util::isSyclSpecConstantType(FldType)) { + // Don't try recursive search below. } else { - // field is some class or struct - recursively check for - // accessor fields + // Field is some class or struct - recursively check for + // accessor fields. createParamDescForWrappedAccessors(WrapperFld, FldType); } } @@ -985,6 +1001,8 @@ static bool buildArgTys(ASTContext &Context, CXXRecordDecl *KernelObj, QualType ArgTy = Fld->getType(); if (Util::isSyclAccessorType(ArgTy) || Util::isSyclSamplerType(ArgTy)) { createSpecialSYCLObjParamDesc(Fld, ArgTy); + } else if (Util::isSyclSpecConstantType(ArgTy)) { + // Specialization constants are not added as arguments. } else if (ArgTy->isStructureOrClassType()) { if (Context.getLangOpts().SYCLStdLayoutKernelParams) { if (!ArgTy->isStandardLayoutType()) { @@ -1056,7 +1074,7 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(KernelObjTy); const std::string StableName = PredefinedExpr::ComputeName( Ctx, PredefinedExpr::UniqueStableNameExpr, NameType); - H.startKernel(Name, NameType, StableName); + H.startKernel(Name, NameType, StableName, KernelObjTy->getLocation()); auto populateHeaderForAccessor = [&](const QualType &ArgTy, uint64_t Offset) { // The parameter is a SYCL accessor object. @@ -1127,6 +1145,21 @@ static void populateIntHeader(SYCLIntegrationHeader &H, const StringRef Name, uint64_t Sz = Ctx.getTypeSizeInChars(Fld->getType()).getQuantity(); H.addParamDesc(SYCLIntegrationHeader::kind_pointer, static_cast(Sz), static_cast(Offset)); + } else if (Util::isSyclSpecConstantType(ArgTy)) { + // Add specialization constant ID to the header. + auto *TmplSpec = + cast(ArgTy->getAsCXXRecordDecl()); + const TemplateArgumentList *TemplateArgs = + &TmplSpec->getTemplateInstantiationArgs(); + // Get specialization constant ID type, which is the second template + // argument. + QualType SpecConstIDTy = TypeName::getFullyQualifiedType( + TemplateArgs->get(1).getAsType(), Ctx, true) + .getCanonicalType(); + const std::string SpecConstName = PredefinedExpr::ComputeName( + Ctx, PredefinedExpr::UniqueStableNameExpr, SpecConstIDTy); + H.addSpecConstant(SpecConstName, SpecConstIDTy); + // Spec constant lambda capture does not become a kernel argument. } else if (ArgTy->isStructureOrClassType() || ArgTy->isScalarType()) { // the parameter is an object of standard layout type or scalar; // the check for standard layout is done elsewhere @@ -1457,7 +1490,8 @@ static std::string eraseAnonNamespace(std::string S) { } // Emits a forward declaration -void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) { +void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D, + SourceLocation KernelLocation) { // wrap the declaration into namespaces if needed unsigned NamespaceCnt = 0; std::string NSStr = ""; @@ -1475,8 +1509,18 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) { if (TD && TD->isCompleteDefinition() && !UnnamedLambdaSupport) { // defined class constituting the kernel name is not globally // accessible - contradicts the spec - Diag.Report(D->getSourceRange().getBegin(), - diag::err_sycl_kernel_name_class_not_top_level); + const bool KernelNameIsMissing = TD->getName().empty(); + if (KernelNameIsMissing) { + Diag.Report(KernelLocation, diag::err_sycl_kernel_incorrectly_named) + << /* kernel name is missing */ 0; + // Don't emit note if kernel name was completely omitted + } else { + Diag.Report(KernelLocation, diag::err_sycl_kernel_incorrectly_named) + << /* kernel name is not globally-visible */ 1; + Diag.Report(D->getSourceRange().getBegin(), + diag::note_previous_decl) + << TD->getName(); + } } } break; @@ -1543,7 +1587,8 @@ void SYCLIntegrationHeader::emitFwdDecl(raw_ostream &O, const Decl *D) { // template class SimpleVadd; // void SYCLIntegrationHeader::emitForwardClassDecls( - raw_ostream &O, QualType T, llvm::SmallPtrSetImpl &Printed) { + raw_ostream &O, QualType T, SourceLocation KernelLocation, + llvm::SmallPtrSetImpl &Printed) { // peel off the pointer types and get the class/struct type: for (; T->isPointerType(); T = T->getPointeeType()) @@ -1565,14 +1610,14 @@ void SYCLIntegrationHeader::emitForwardClassDecls( switch (Arg.getKind()) { case TemplateArgument::ArgKind::Type: - emitForwardClassDecls(O, Arg.getAsType(), Printed); + emitForwardClassDecls(O, Arg.getAsType(), KernelLocation, Printed); break; case TemplateArgument::ArgKind::Pack: { ArrayRef Pack = Arg.getPackAsArray(); for (const auto &T : Pack) { if (T.getKind() == TemplateArgument::ArgKind::Type) { - emitForwardClassDecls(O, T.getAsType(), Printed); + emitForwardClassDecls(O, T.getAsType(), KernelLocation, Printed); } } break; @@ -1595,7 +1640,7 @@ void SYCLIntegrationHeader::emitForwardClassDecls( // class template