Skip to content

Commit

Permalink
Merge branch 'sycl' into array_sub_region
Browse files Browse the repository at this point in the history
  • Loading branch information
isaacault committed Sep 10, 2024
2 parents f2639fd + ed2128d commit e40db8a
Show file tree
Hide file tree
Showing 25 changed files with 314 additions and 264 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/sycl-macos-build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
CCACHE_MAXSIZE: ${{ inputs.build_cache_size }}
steps:
- name: Install dependencies
run: brew install ccache ninja hwloc
run: brew install ccache ninja hwloc zstd
- uses: actions/checkout@v4
with:
ref: ${{ inputs.build_ref }}
Expand Down
5 changes: 5 additions & 0 deletions devops/containers/ubuntu2204_base.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ RUN groupadd -g 1001 sycl && useradd sycl -u 1001 -g 1001 -m -s /bin/bash
# Add sycl user to video/irc groups so that it can access GPU
RUN usermod -aG video sycl
RUN usermod -aG irc sycl

# group 109 is required for sycl user to access PVC card.
RUN groupadd -g 109 render
RUN usermod -aG render sycl

# Allow sycl user to run as sudo
RUN echo "sycl ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers

Expand Down
3 changes: 2 additions & 1 deletion devops/scripts/install_build_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ apt update && apt install -yqq \
unzip \
jq \
curl \
libhwloc-dev
libhwloc-dev \
libzstd-dev

pip3 install psutil

4 changes: 4 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
#include "compiler/utils/builtin_info.h"
#include "compiler/utils/define_mux_builtins_pass.h"
#include "compiler/utils/device_info.h"
#include "compiler/utils/encode_kernel_metadata_pass.h"
#include "compiler/utils/prepare_barriers_pass.h"
#include "compiler/utils/replace_local_module_scope_variables_pass.h"
#include "compiler/utils/sub_group_analysis.h"
#include "compiler/utils/work_item_loops_pass.h"
#include "vecz/pass.h"
Expand Down Expand Up @@ -60,6 +62,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
OptimizationLevel OptLevel) {
MPM.addPass(ConvertToMuxBuiltinsSYCLNativeCPUPass());
#ifdef NATIVECPU_USE_OCK
MPM.addPass(compiler::utils::TransferKernelMetadataPass());
// Always enable vectorizer, unless explictly disabled or -O0 is set.
if (OptLevel != OptimizationLevel::O0 && !SYCLNativeCPUNoVecz) {
MAM.registerPass([] { return vecz::TargetInfoAnalysis(); });
Expand Down Expand Up @@ -87,6 +90,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
MAM.registerPass([] { return compiler::utils::SubgroupAnalysis(); });
MPM.addPass(compiler::utils::PrepareBarriersPass());
MPM.addPass(compiler::utils::WorkItemLoopsPass(Opts));
MPM.addPass(compiler::utils::ReplaceLocalModuleScopeVariablesPass());
MPM.addPass(AlwaysInlinerPass());
#endif
MPM.addPass(PrepareSYCLNativeCPUPass());
Expand Down
48 changes: 11 additions & 37 deletions llvm/lib/SYCLNativeCPUUtils/PrepareSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,47 +338,21 @@ PreservedAnalyses PrepareSYCLNativeCPUPass::run(Module &M,
SmallSet<Function *, 5> RemovableFuncs;
SmallVector<Function *, 5> WrapperFuncs;

// Retrieve the wrapper functions created by the WorkItemLoop pass.
for (auto &OldF : OldKernels) {
std::optional<compiler::utils::LinkMetadataResult> VeczR =
compiler::utils::parseVeczToOrigFnLinkMetadata(*OldF);
if (VeczR && VeczR.value().first) {
WrapperFuncs.push_back(OldF);
} else {
auto Name = compiler::utils::getBaseFnNameOrFnName(*OldF);
if (Name != OldF->getName()) {
WrapperFuncs.push_back(OldF);
}
}
}

for (auto &OldF : WrapperFuncs) {
// If vectorization occurred, at this point we have a wrapper function
// that runs the vectorized kernel and peels using the scalar kernel. We
// make it so this wrapper steals the original kernel name.
std::optional<compiler::utils::LinkMetadataResult> VeczR =
compiler::utils::parseVeczToOrigFnLinkMetadata(*OldF);
if (VeczR && VeczR.value().first) {
auto ScalarF = VeczR.value().first;
OldF->takeName(ScalarF);
if (ScalarF->use_empty())
RemovableFuncs.insert(ScalarF);
} else {
// The WorkItemLoops pass created a wrapper function for the original
// kernel. If we have a kernel named foo(), the wrapper will be called
// foo-wrapper(), and will have the original kernel name retrieved by
// getBaseFnNameOrFnName. We set the name of the wrapper function
// to the original kernel name and add the original kernel to the
// list of functions that can be removed from the module.
auto Name = compiler::utils::getBaseFnNameOrFnName(*OldF);
Function *OrigF = M.getFunction(Name);
// that runs the vectorized kernel and peels using the scalar kernel.
// There may also be a wrapper for local variables replacement. We make it
// so this wrapper steals the original kernel name. Otherwise we will have
// a wrapper function from the work item loops. In this case we also steal
// the original kernel name.
auto Name = compiler::utils::getOrigFnName(*OldF);
Function *OrigF = M.getFunction(Name);
if (Name != OldF->getName()) {
if (OrigF != nullptr) {
// The original kernel is inlined by the WorkItemLoops
// pass if it contained barriers or group collectives, otherwise
// we don't want to (and can't) remove it.
if (OrigF->use_empty())
RemovableFuncs.insert(OrigF);
OldF->takeName(OrigF);
if (OrigF->use_empty()) {
RemovableFuncs.insert(OrigF);
}
} else {
OldF->setName(Name);
}
Expand Down
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
# Date: Wed Jul 24 08:57:49 2024 +0100
# [Bindless][Exp] Image Array Sub-Region Copies
# * Add support for sub-region copies.
set(UNIFIED_RUNTIME_TAG 005fe3921349d6d56621a74b47004ac98679a0e1 )
set(UNIFIED_RUNTIME_TAG 6b353545ab9ee05f4b3049e68054f40b438489e6 )

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
104 changes: 103 additions & 1 deletion sycl/include/sycl/detail/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,26 @@

// FIXME: include export.hpp because integration header emitted by the compiler
// uses the macro defined in this header, but it doesn't explicitly include it.
#include <sycl/detail/defines_elementary.hpp>
#include <sycl/detail/export.hpp>

// This header file must not include any standard C++ header files.

#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
#if __has_builtin(__builtin_sycl_kernel_name)
static_assert(__has_builtin(__builtin_sycl_kernel_param_count) &&
__has_builtin(__builtin_sycl_kernel_name) &&
__has_builtin(__builtin_sycl_kernel_param_access_target) &&
__has_builtin(__builtin_sycl_kernel_param_size) &&
__has_builtin(__builtin_sycl_kernel_param_offset) &&
__has_builtin(__builtin_sycl_kernel_file_name) &&
__has_builtin(__builtin_sycl_kernel_function_name) &&
__has_builtin(__builtin_sycl_kernel_line_number) &&
__has_builtin(__builtin_sycl_kernel_column_number));
#else
#define __INTEL_SYCL_USE_INTEGRATION_HEADERS 1
#endif
#endif

namespace sycl {
inline namespace _V1 {
namespace detail {
Expand Down Expand Up @@ -151,6 +167,92 @@ template <class KernelNameType> struct KernelInfo {
};
#endif //__SYCL_UNNAMED_LAMBDA__

// Built-ins accept an object due to lacking infrastructure support for
// accepting types. The kernel name type itself isn't used because it might be
// incomplete, cv-qualified, or not default constructible. Passing an object
// also allows future extension for SYCL kernels defined as free functions.
template <typename KNT> struct KernelIdentity {
using type = KNT;
};

template <typename KernelNameType> constexpr unsigned getKernelNumParams() {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
return __builtin_sycl_kernel_param_count(KernelIdentity<KernelNameType>());
#else
return KernelInfo<KernelNameType>::getNumParams();
#endif
}

template <typename KernelNameType>
kernel_param_desc_t getKernelParamDesc(int Idx) {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
kernel_param_desc_t ParamDesc;
ParamDesc.kind =
__builtin_sycl_kernel_param_kind(KernelIdentity<KernelNameType>(), Idx);
ParamDesc.info = ParamDesc.kind == kernel_param_kind_t::kind_accessor
? __builtin_sycl_kernel_param_access_target(
KernelIdentity<KernelNameType>(), Idx)
: __builtin_sycl_kernel_param_size(
KernelIdentity<KernelNameType>(), Idx);
ParamDesc.offset =
__builtin_sycl_kernel_param_offset(KernelIdentity<KernelNameType>(), Idx);
return ParamDesc;
#else
return KernelInfo<KernelNameType>::getParamDesc(Idx);
#endif
}

template <typename KernelNameType> constexpr const char *getKernelName() {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
return __builtin_sycl_kernel_name(KernelIdentity<KernelNameType>());
#else
return KernelInfo<KernelNameType>::getName();
#endif
}

template <typename KernelNameType> constexpr bool isKernelESIMD() {
// TODO Needs a builtin counterpart
return KernelInfo<KernelNameType>::isESIMD();
}

template <typename KernelNameType> constexpr const char *getKernelFileName() {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
return __builtin_sycl_kernel_file_name(KernelIdentity<KernelNameType>());
#else
return KernelInfo<KernelNameType>::getFileName();
#endif
}

template <typename KernelNameType>
constexpr const char *getKernelFunctionName() {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
return __builtin_sycl_kernel_function_name(KernelIdentity<KernelNameType>());
#else
return KernelInfo<KernelNameType>::getFunctionName();
#endif
}

template <typename KernelNameType> constexpr unsigned getKernelLineNumber() {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
return __builtin_sycl_kernel_line_number(KernelIdentity<KernelNameType>());
#else
return KernelInfo<KernelNameType>::getLineNumber();
#endif
}

template <typename KernelNameType> constexpr unsigned getKernelColumnNumber() {
#ifndef __INTEL_SYCL_USE_INTEGRATION_HEADERS
return __builtin_sycl_kernel_column_number(KernelIdentity<KernelNameType>());
#else
return KernelInfo<KernelNameType>::getColumnNumber();
#endif
}

template <typename KernelNameType> constexpr int64_t getKernelSize() {
// TODO needs a builtin counterpart, but is currently only used for checking
// cases with external host compiler, which use integration headers.
return KernelInfo<KernelNameType>::getKernelSize();
}
} // namespace detail
} // namespace _V1
} // namespace sycl
Loading

0 comments on commit e40db8a

Please sign in to comment.