Skip to content

Commit

Permalink
Merge branch 'sycl' into array_sub_region
Browse files Browse the repository at this point in the history
  • Loading branch information
isaacault committed Sep 12, 2024
2 parents e40db8a + 404fb8a commit b3139da
Show file tree
Hide file tree
Showing 89 changed files with 1,128 additions and 250 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sycl-linux-precommit-aws.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
with:
name: CUDA E2E
runner: '["aws_cuda-${{ github.event.workflow_run.id }}-${{ github.event.workflow_run.run_attempt }}"]'
image: ghcr.io/intel/llvm/ubuntu2204_build:latest-0300ac924620a51f76c4929794637b82790f12ab
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
image_options: -u 1001 --gpus all --cap-add SYS_ADMIN --env NVIDIA_DISABLE_REQUIRE=1
target_devices: ext_oneapi_cuda:gpu
# No idea why but that seems to work and be in sync with the main
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/sycl-linux-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ jobs:
build_artifact_suffix: "default"
build_cache_suffix: "default"
changes: ${{ needs.detect_changes.outputs.filters }}
build_image: "ghcr.io/intel/llvm/ubuntu2204_build:latest-0300ac924620a51f76c4929794637b82790f12ab"

determine_arc_tests:
name: Decide which Arc tests to run
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/sycl-post-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
reset_intel_gpu: true
- name: AMD/HIP
runner: '["Linux", "amdgpu"]'
image: ghcr.io/intel/llvm/ubuntu2204_build:latest-0300ac924620a51f76c4929794637b82790f12ab
image: ghcr.io/intel/llvm/ubuntu2204_build:latest
image_options: -u 1001 --device=/dev/dri --device=/dev/kfd
target_devices: ext_oneapi_hip:gpu
reset_intel_gpu: false
Expand Down
139 changes: 137 additions & 2 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,8 @@ Expected<OffloadFile> getInputBitcodeLibrary(StringRef Input) {
Image.StringData["arch"] = Arch;
Image.Image = std::move(*ImageOrError);

std::unique_ptr<MemoryBuffer> Binary =
MemoryBuffer::getMemBufferCopy(OffloadBinary::write(Image));
std::unique_ptr<MemoryBuffer> Binary = MemoryBuffer::getMemBufferCopy(
OffloadBinary::write(Image), Image.Image->getBufferIdentifier());
auto NewBinaryOrErr = OffloadBinary::create(*Binary);
if (!NewBinaryOrErr)
return NewBinaryOrErr.takeError();
Expand Down Expand Up @@ -1358,6 +1358,135 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
return *DeviceLinkedFile;
}

static bool isStaticArchiveFile(const StringRef Filename) {
if (!llvm::sys::path::has_extension(Filename))
// Any file with no extension should not be considered an Archive.
return false;
llvm::file_magic Magic;
llvm::identify_magic(Filename, Magic);
// Only archive files are to be considered.
// TODO: .lib check to be added
return (Magic == llvm::file_magic::archive);
}

static Expected<StringRef> listSection(StringRef Filename,
const ArgList &Args) {
Expected<std::string> OffloadBundlerPath = findProgram(
"clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
if (!OffloadBundlerPath)
return OffloadBundlerPath.takeError();
BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);

SmallVector<StringRef, 8> CmdArgs;
CmdArgs.push_back(*OffloadBundlerPath);
bool IsArchive = isStaticArchiveFile(Filename);
CmdArgs.push_back(IsArchive ? "-type=aoo" : "-type=o");
CmdArgs.push_back(Saver.save("-input=" + Filename));
CmdArgs.push_back("-list");
auto Output = createOutputFile("bundled-targets", "list");
if (!Output)
return Output.takeError();
SmallVector<std::optional<StringRef>> Redirects{std::nullopt, *Output,
std::nullopt};
int ErrCode = llvm::sys::ExecuteAndWait(*OffloadBundlerPath, CmdArgs,
std::nullopt, Redirects);
if (ErrCode != 0)
return createStringError(inconvertibleErrorCode(),
"Failed to list targets");
return *Output;
}

// This routine is used to run the clang-offload-bundler tool and unbundle
// device inputs that have been created with an older compiler where the
// device object is bundled into a host object.
static Expected<StringRef> unbundle(StringRef Filename, const ArgList &Args,
llvm::Triple Triple) {
Expected<std::string> OffloadBundlerPath = findProgram(
"clang-offload-bundler", {getMainExecutable("clang-offload-bundler")});
if (!OffloadBundlerPath)
return OffloadBundlerPath.takeError();

// Create a new file to write the unbundled file to.
auto TempFileOrErr =
createOutputFile(sys::path::filename(ExecutableName), "ir");
if (!TempFileOrErr)
return TempFileOrErr.takeError();

BumpPtrAllocator Alloc;
StringSaver Saver(Alloc);

SmallVector<StringRef, 8> CmdArgs;
CmdArgs.push_back(*OffloadBundlerPath);
bool IsArchive = isStaticArchiveFile(Filename);
CmdArgs.push_back(IsArchive ? "-type=aoo" : "-type=o");
auto *Target = Args.MakeArgString(Twine("-targets=sycl-") + Triple.str());
CmdArgs.push_back(Target);
CmdArgs.push_back(Saver.save("-input=" + Filename));
CmdArgs.push_back(Saver.save("-output=" + *TempFileOrErr));
CmdArgs.push_back("-unbundle");
CmdArgs.push_back("-allow-missing-bundles");
if (Error Err = executeCommands(*OffloadBundlerPath, CmdArgs))
return std::move(Err);
return *TempFileOrErr;
}

Error extractBundledObjects(StringRef Filename, const ArgList &Args,
SmallVector<OffloadFile> &Binaries) {
auto List = listSection(Filename, Args);
if (!List)
return List.takeError();
SmallVector<StringRef> TriplesInFile;
llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> TripleList =
llvm::MemoryBuffer::getFileOrSTDIN(*List, /*isText=*/true);
if (std::error_code EC = TripleList.getError())
return createFileError(*List, EC);
(*TripleList)
->getBuffer()
.split(TriplesInFile, '\n', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
for (StringRef TripleStr : TriplesInFile) {
StringRef SYCLPrefix = "sycl-";
if (!TripleStr.starts_with(SYCLPrefix))
continue;
llvm::Triple Triple(TripleStr.substr(SYCLPrefix.size()));
auto UnbundledFile = unbundle(Filename, Args, Triple);
if (!UnbundledFile)
return UnbundledFile.takeError();
if (*UnbundledFile == Filename)
continue;

SmallVector<StringRef> ObjectFilePaths;
if (sycl::isStaticArchiveFile(Filename)) {
llvm::ErrorOr<std::unique_ptr<MemoryBuffer>> ObjList =
llvm::MemoryBuffer::getFileOrSTDIN(*UnbundledFile, /*isText=*/true);
if (std::error_code EC = ObjList.getError())
return createFileError(*UnbundledFile, EC);
(*ObjList)->getBuffer().split(ObjectFilePaths, '\n', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
} else {
ObjectFilePaths.push_back(*UnbundledFile);
}
for (StringRef ObjectFilePath : ObjectFilePaths) {
llvm::file_magic Magic;
llvm::identify_magic(ObjectFilePath, Magic);
if (Magic == file_magic::spirv_object)
return createStringError(
"SPIR-V fat objects must be generated with --offload-new-driver");
auto Arg = Args.MakeArgString(
"sycl-" +
(Triple.isSPIROrSPIRV() ? Triple.str() + "-" : Triple.str()) + "=" +
ObjectFilePath);
auto Binary = getInputBitcodeLibrary(Arg);

if (!Binary)
return Binary.takeError();

Binaries.push_back(std::move(*Binary));
}
}
return Error::success();
}

} // namespace sycl

namespace generic {
Expand Down Expand Up @@ -2634,8 +2763,14 @@ getDeviceInput(const ArgList &Args) {
if (identify_magic(Buffer.getBuffer()) == file_magic::elf_shared_object)
continue;
SmallVector<OffloadFile> Binaries;
size_t OldSize = Binaries.size();
if (Error Err = extractOffloadBinaries(Buffer, Binaries))
return std::move(Err);
if (Binaries.size() == OldSize) {
if (Error Err = sycl::extractBundledObjects(*Filename, Args, Binaries))
return std::move(Err);
}

for (auto &OffloadFile : Binaries) {
if (identify_magic(Buffer.getBuffer()) == file_magic::archive &&
!WholeArchive)
Expand Down
6 changes: 3 additions & 3 deletions devops/dependencies.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"level_zero": {
"github_tag": "v1.17.39",
"version": "v1.17.39",
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.17.39",
"github_tag": "v1.17.42",
"version": "v1.17.42",
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.17.42",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"tbb": {
Expand Down
4 changes: 1 addition & 3 deletions llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ using EntryPointSet = SetVector<Function *>;

PropSetRegTy computeModuleProperties(const Module &M,
const EntryPointSet &EntryPoints,
const GlobalBinImageProps &GlobProps,
bool SpecConstsMet,
bool IsSpecConstantDefault);
const GlobalBinImageProps &GlobProps);

std::string computeModuleSymbolTable(const Module &M,
const EntryPointSet &EntryPoints);
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/SpecConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ class SpecConstantsPass : public PassInfoMixin<SpecConstantsPass> {
collectSpecConstantDefaultValuesMetadata(const Module &M,
std::vector<char> &DefaultValues);

// Name of the metadata which indicates this module was proccessed with the
// default values handing mode.
static constexpr char SPEC_CONST_DEFAULT_VAL_MODULE_MD_STRING[] =
"sycl.specialization-constants-default-values-module";

private:
HandlingMode Mode;
};
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Analysis/BasicAliasAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ struct CastedValue {
}

bool hasSameCastsAs(const CastedValue &Other) const {
if (V->getType() != Other.V->getType())
return false;

if (ZExtBits == Other.ZExtBits && SExtBits == Other.SExtBits &&
TruncBits == Other.TruncBits)
return true;
Expand Down
18 changes: 10 additions & 8 deletions llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,7 @@ uint32_t getKernelWorkGroupNumDim(const Function &Func) {

PropSetRegTy computeModuleProperties(const Module &M,
const EntryPointSet &EntryPoints,
const GlobalBinImageProps &GlobProps,
bool SpecConstsMet,
bool IsSpecConstantDefault) {
const GlobalBinImageProps &GlobProps) {

PropSetRegTy PropSet;
{
Expand All @@ -152,10 +150,11 @@ PropSetRegTy computeModuleProperties(const Module &M,
PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS,
computeDeviceRequirements(M, EntryPoints).asMap());
}
if (SpecConstsMet) {
// extract spec constant maps per each module
SpecIDMapTy TmpSpecIDMap;
SpecConstantsPass::collectSpecConstantMetadata(M, TmpSpecIDMap);

// extract spec constant maps per each module
SpecIDMapTy TmpSpecIDMap;
SpecConstantsPass::collectSpecConstantMetadata(M, TmpSpecIDMap);
if (!TmpSpecIDMap.empty()) {
PropSet.add(PropSetRegTy::SYCL_SPECIALIZATION_CONSTANTS, TmpSpecIDMap);

// Add property with the default values of spec constants
Expand Down Expand Up @@ -369,7 +368,10 @@ PropSetRegTy computeModuleProperties(const Module &M,
if (!HostPipePropertyMap.empty()) {
PropSet.add(PropSetRegTy::SYCL_HOST_PIPES, HostPipePropertyMap);
}

bool IsSpecConstantDefault =
M.getNamedMetadata(
SpecConstantsPass::SPEC_CONST_DEFAULT_VAL_MODULE_MD_STRING) !=
nullptr;
if (IsSpecConstantDefault)
PropSet.add(PropSetRegTy::SYCL_MISC_PROP, "specConstsReplacedWithDefault",
1);
Expand Down
18 changes: 18 additions & 0 deletions llvm/test/Analysis/BasicAA/issue103500.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
; RUN: opt -aa-pipeline=basic-aa -passes=aa-eval -print-all-alias-modref-info -disable-output %s 2>&1 | FileCheck %s

target datalayout = "p0:64:64-p5:32:32"

; CHECK: Function: indexing_different_sized_addrspace: 2 pointers, 0 call sites
; CHECK: MayAlias: i32* %gep.in.0, i32 addrspace(5)* %gep.in.5.1

define i1 @indexing_different_sized_addrspace(ptr addrspace(5) %arg, i64 %arg1, i32 %arg2) {
bb:
%arg.addrspacecast = addrspacecast ptr addrspace(5) %arg to ptr
%gep.in.5 = getelementptr i8, ptr addrspace(5) %arg, i32 16
%gep.in.0 = getelementptr i8, ptr %arg.addrspacecast, i64 %arg1
%gep.in.5.1 = getelementptr i8, ptr addrspace(5) %gep.in.5, i32 %arg2
%load.0 = load i32, ptr %gep.in.0, align 4
%load.1 = load i32, ptr addrspace(5) %gep.in.5.1, align 4
%cmp = icmp slt i32 %load.0, %load.1
ret i1 %cmp
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

; CHECK: %bool1 = trunc i8 1 to i1
; CHECK: %frombool = zext i1 %bool1 to i8
; CHECK: !sycl.specialization-constants-default-values-module = !{}
; CHECK-LOG: sycl.specialization-constants
; CHECK-LOG:[[UNIQUE_PREFIX:[0-9a-zA-Z]+]]={0, 0, 1}
; CHECK-LOG: sycl.specialization-constants-default-values
Expand Down
9 changes: 6 additions & 3 deletions llvm/tools/sycl-post-link/sycl-post-link.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,8 @@ std::string saveModuleIR(Module &M, int I, StringRef Suff) {
std::string saveModuleProperties(module_split::ModuleDesc &MD,
const GlobalBinImageProps &GlobProps, int I,
StringRef Suff, StringRef Target = "") {
auto PropSet = computeModuleProperties(MD.getModule(), MD.entries(),
GlobProps, MD.Props.SpecConstsMet,
MD.isSpecConstantDefault());
auto PropSet =
computeModuleProperties(MD.getModule(), MD.entries(), GlobProps);

std::string NewSuff = Suff.str();
if (!Target.empty()) {
Expand Down Expand Up @@ -510,6 +509,10 @@ processSpecConstantsWithDefaultValues(const module_split::ModuleDesc &MD) {
assert(NewModuleDesc->Props.SpecConstsMet &&
"This property should be true since the presence of SpecConsts "
"has been checked before the run of the pass");
// Add metadata to the module so we can identify it as the default value split
// later.
NewModuleDesc->getModule().getOrInsertNamedMetadata(
SpecConstantsPass::SPEC_CONST_DEFAULT_VAL_MODULE_MD_STRING);
NewModuleDesc->rebuildEntryPoints();
return NewModuleDesc;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/utils/git/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ charset-normalizer==3.3.2 \
--hash=sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519 \
--hash=sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561
# via requests
cryptography==42.0.5 \
cryptography==43.0.1 \
--hash=sha256:0270572b8bd2c833c3981724b8ee9747b3ec96f699a9665470018594301439ee \
--hash=sha256:111a0d8553afcf8eb02a4fea6ca4f59d48ddb34497aa8706a6cf536f1a5ec576 \
--hash=sha256:16a48c23a62a2f4a285699dba2e4ff2d1cff3115b9df052cdd976a18856d8e3d \
Expand Down
13 changes: 7 additions & 6 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,14 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/isaacault/unified-runtime.git")
# commit 6ded47d44cf650b7fc5d022f27b3414f6c18312b
# Author: Isaac Ault <isaac.ault@codeplay.com>
# Date: Wed Jul 24 08:57:49 2024 +0100
set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 2bbe952669861579ea84fa30f14e1ed27ead0692
# Merge: d357964a 6b353545
# Author: Omar Ahmed <omar.ahmed@codeplay.com>
# Date: Thu Sep 12 11:36:11 2024 +0100
# Merge pull request #1928 from isaacault/iault/image_array_copy
# [Bindless][Exp] Image Array Sub-Region Copies
# * Add support for sub-region copies.
set(UNIFIED_RUNTIME_TAG 6b353545ab9ee05f4b3049e68054f40b438489e6 )
set(UNIFIED_RUNTIME_TAG 2bbe952669861579ea84fa30f14e1ed27ead0692)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
2 changes: 1 addition & 1 deletion sycl/doc/design/CompilerAndRuntimeDesign.md
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ Note: Kernel naming is not fully stable for now.
##### Kernel Fusion Support

The [experimental kernel fusion
extension](../extensions/experimental/sycl_ext_codeplay_kernel_fusion.asciidoc)
extension](../extensions/removed/sycl_ext_codeplay_kernel_fusion.asciidoc)
also supports the CUDA and HIP backends. However, as the CUBIN, PTX and AMD assembly
are not suitable input formats for the [kernel fusion JIT compiler](KernelFusionJIT.md), a
suitable IR has to be added as an additional device binary.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,11 @@ otherwise it is 0.
|Returns the maximum number of work-groups, when the kernel is submitted to the
specified queue with the specified work-group size and the specified amount of
dynamic work-group local memory (in bytes), accounting for any kernel
properties or features. If the kernel can be submitted to the specified queue
without an error, the minimum value returned by this query is 1, otherwise it
is 0.
properties or features. If the specified work-group size is 0, which is
invalid, then the implementation will throw a synchronous exception with the
`errc::invalid` error code. If the kernel can be submitted to the specified
queue without an error, the minimum value returned by this query is 1,
otherwise it is 0.

|===

Expand Down
Loading

0 comments on commit b3139da

Please sign in to comment.