From 014a1c9092a41b9ea0aa4fdd4818a6d075c56123 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 4 Jul 2024 14:46:12 +0200 Subject: [PATCH 1/2] Add support for static linking of the L0 adapter This patch adds a cmake option that builds the L0 adapter as a static library and then embeds its in the loader. The mechanim itself is fairly generic and, when a need arises, the same can be done for other adapters. --- .github/workflows/build-hw-reusable.yml | 11 +- .github/workflows/cmake.yml | 13 +- CMakeLists.txt | 1 + scripts/generate_code.py | 30 + scripts/templates/ldrddi.cpp.mako | 4 + scripts/templates/queue_api.cpp.mako | 4 +- .../templates/ur_interface_loader.cpp.mako | 88 ++ .../templates/ur_interface_loader.hpp.mako | 38 + source/adapters/level_zero/CMakeLists.txt | 32 +- source/adapters/level_zero/adapter.cpp | 18 +- source/adapters/level_zero/command_buffer.cpp | 100 +- source/adapters/level_zero/common.hpp | 2 +- source/adapters/level_zero/context.cpp | 17 +- source/adapters/level_zero/context.hpp | 2 +- source/adapters/level_zero/device.cpp | 753 +++++++-------- source/adapters/level_zero/device.hpp | 2 +- source/adapters/level_zero/enqueue_native.cpp | 6 +- source/adapters/level_zero/event.cpp | 148 +-- source/adapters/level_zero/event.hpp | 2 +- source/adapters/level_zero/image.cpp | 78 +- source/adapters/level_zero/image.hpp | 2 +- source/adapters/level_zero/kernel.cpp | 125 +-- source/adapters/level_zero/memory.cpp | 225 ++--- source/adapters/level_zero/memory.hpp | 2 +- source/adapters/level_zero/physical_mem.cpp | 11 +- source/adapters/level_zero/platform.cpp | 97 +- source/adapters/level_zero/program.cpp | 103 ++- source/adapters/level_zero/queue.cpp | 88 +- source/adapters/level_zero/queue.hpp | 2 +- source/adapters/level_zero/sampler.cpp | 23 +- .../level_zero/ur_interface_loader.cpp | 867 ++++++++++-------- .../level_zero/ur_interface_loader.hpp | 706 ++++++++++++++ source/adapters/level_zero/ur_level_zero.hpp | 2 +- source/adapters/level_zero/usm.cpp | 217 ++--- source/adapters/level_zero/usm_p2p.cpp | 20 +- source/adapters/level_zero/v2/api.cpp | 390 ++++---- .../level_zero/v2/command_list_cache.hpp | 2 +- source/adapters/level_zero/v2/context.cpp | 25 +- source/adapters/level_zero/v2/event.cpp | 8 +- .../level_zero/v2/event_provider_normal.hpp | 5 +- source/adapters/level_zero/v2/kernel.cpp | 21 +- source/adapters/level_zero/v2/queue_api.cpp | 210 +++-- .../adapters/level_zero/v2/queue_create.cpp | 11 +- source/adapters/level_zero/v2/usm.cpp | 28 +- source/adapters/level_zero/virtual_mem.cpp | 42 +- source/common/CMakeLists.txt | 13 +- source/common/ur_util.hpp | 2 + source/loader/CMakeLists.txt | 8 + source/loader/ur_adapter_registry.hpp | 15 +- source/loader/ur_ldrddi.cpp | 105 +++ source/loader/ur_loader.cpp | 13 +- test/adapters/level_zero/CMakeLists.txt | 3 +- test/adapters/level_zero/v2/CMakeLists.txt | 4 + 53 files changed, 3023 insertions(+), 1721 deletions(-) create mode 100644 scripts/templates/ur_interface_loader.cpp.mako create mode 100644 scripts/templates/ur_interface_loader.hpp.mako create mode 100644 source/adapters/level_zero/ur_interface_loader.hpp diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml index 88b0877c27..fa23c38248 100644 --- a/.github/workflows/build-hw-reusable.yml +++ b/.github/workflows/build-hw-reusable.yml @@ -18,6 +18,10 @@ on: required: false type: string default: OFF + static_adapter: + required: false + type: string + default: OFF permissions: contents: read @@ -36,7 +40,7 @@ jobs: strategy: matrix: adapter: [ - {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}"}, + {name: "${{inputs.adapter_name}}", platform: "${{inputs.platform}}", static_Loader: "${{inputs.static_loader}}", static_adapter: "${{inputs.static_loader}}"}, ] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] @@ -49,6 +53,10 @@ jobs: build_type: Release - adapter: {static_Loader: ON} compiler: {c: clang, cxx: clang++} + - adapter: {static_adapter: ON} + build_type: Release + - adapter: {static_adapter: ON} + compiler: {c: clang, cxx: clang++} runs-on: ${{inputs.runner_name}} @@ -76,6 +84,7 @@ jobs: -DUR_BUILD_TESTS=ON -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON -DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}} + -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}} -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index cd5c91854c..f76326c1c4 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -155,6 +155,7 @@ jobs: adapter_name: L0 runner_name: L0 static_loader: ON + static_adapter: ON opencl: name: OpenCL @@ -216,7 +217,8 @@ jobs: os: ['windows-2019', 'windows-2022'] adapter: [ {name: None, var: ''}, {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'}, - {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} + {name: None, var: ''}, {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'}, + {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} ] # TODO: building level zero loader on windows-2019 and clang-cl is currently broken @@ -225,16 +227,25 @@ jobs: adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} - os: 'windows-2019' adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} + - os: 'windows-2019' + adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON'} compiler: {c: clang-cl, cxx: clang-cl} - adapter: {name: L0_V2, var: '-DUR_BUILD_ADAPTER_L0_V2=ON'} compiler: {c: clang-cl, cxx: clang-cl} + - adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} + compiler: {c: clang-cl, cxx: clang-cl} build_type: [Debug, Release] compiler: [{c: cl, cxx: cl}, {c: clang-cl, cxx: clang-cl}] include: - compiler: {c: clang-cl, cxx: clang-cl} toolset: "-T ClangCL" + - os: 'windows-2022' + adapter: {name: L0, var: '-DUR_BUILD_ADAPTER_L0=ON -DUR_STATIC_ADAPTER_L0=ON'} + build_type: 'Release' + compiler: {c: cl, cxx: cl} + runs-on: ${{matrix.os}} steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index f54cbd1067..a908a22d80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF) option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF) option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF) option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF) +option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF) diff --git a/scripts/generate_code.py b/scripts/generate_code.py index 0e58c7a43c..0c7476ab42 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -400,6 +400,32 @@ def generate_loader(path, section, namespace, tags, version, specs, meta): ) print("Generated %s lines of code.\n"%loc) +""" + generates c/c++ files from the specification documents +""" +def _mako_interface_loader_api(path, adapter, ext, namespace, tags, version, specs, meta): + dstpath = os.path.join(path, adapter) + os.makedirs(dstpath, exist_ok=True) + + template = f"ur_interface_loader.{ext}.mako" + fin = os.path.join(templates_dir, template) + + name = f"ur_interface_loader" + + filename = f"{name}.{ext}" + fout = os.path.join(dstpath, filename) + + print("Generating %s..."%fout) + return util.makoWrite( + fin, fout, + name=name, + adapter=adapter, + ver=version, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta,) + """ Entry-point: generates adapter for unified_runtime @@ -416,6 +442,10 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta): loc += _mako_linker_scripts( dstpath, "adapter", "def", namespace, tags, version, specs, meta ) + + loc += _mako_interface_loader_api(dstpath, "level_zero", "cpp", namespace, tags, version, specs, meta) + loc += _mako_interface_loader_api(dstpath, "level_zero", "hpp", namespace, tags, version, specs, meta) + print("Generated %s lines of code.\n"%loc) """ diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 44631cc360..9c797a0ec3 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -365,6 +365,10 @@ ${tbl['export']['name']}( // Load the device-platform DDI tables for( auto& platform : ur_loader::getContext()->platforms ) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) + continue; + if(platform.initStatus != ${X}_RESULT_SUCCESS) continue; auto getTable = reinterpret_cast<${tbl['pfn']}>( diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako index f941c7ba03..fcfa89d258 100644 --- a/scripts/templates/queue_api.cpp.mako +++ b/scripts/templates/queue_api.cpp.mako @@ -24,8 +24,9 @@ from templates import helper as th ur_queue_handle_t_::~ur_queue_handle_t_() {} ## FUNCTION ################################################################### +namespace ${x}::level_zero { %for obj in th.get_queue_related_functions(specs, n, tags): -${X}_APIEXPORT ${x}_result_t ${X}_APICALL +${x}_result_t ${th.make_func_name(n, tags, obj)}( %for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]): ${line} @@ -35,3 +36,4 @@ ${th.make_func_name(n, tags, obj)}( return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; } %endfor +} \ No newline at end of file diff --git a/scripts/templates/ur_interface_loader.cpp.mako b/scripts/templates/ur_interface_loader.cpp.mako new file mode 100644 index 0000000000..3298b5bcae --- /dev/null +++ b/scripts/templates/ur_interface_loader.cpp.mako @@ -0,0 +1,88 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.cpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +#include "ur_interface_loader.hpp" + +static ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ${n}::${adapter} { +#elif defined(__cplusplus) +extern "C" { +#endif + +%for tbl in th.get_pfntables(specs, meta, n, tags): +${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}( + %for line in th.make_param_lines(n, tags, tbl['export'], format=["type", "name", "delim"]): + ${line} + %endfor + ) +{ + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + %for obj in tbl['functions']: + pDdiTable->${th.append_ws(th.make_pfn_name(n, tags, obj), 43)} = ${n}::${adapter}::${th.make_func_name(n, tags, obj)}; + %endfor + + return result; +} + +%endfor + +#ifdef UR_STATIC_ADAPTER_${Adapter} +} // namespace ur::${adapter} +#elif defined(__cplusplus) +} // extern "C" +#endif + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ur::${adapter} { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + +%for tbl in th.get_pfntables(specs, meta, n, tags): + result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); + if (result != UR_RESULT_SUCCESS) + return result; +%endfor + + return result; +} +} +#endif diff --git a/scripts/templates/ur_interface_loader.hpp.mako b/scripts/templates/ur_interface_loader.hpp.mako new file mode 100644 index 0000000000..e2902f93c8 --- /dev/null +++ b/scripts/templates/ur_interface_loader.hpp.mako @@ -0,0 +1,38 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +namespace ${n}::${adapter} { +%for s in specs: +%for obj in th.filter_items(s['objects'], 'type', 'function'): +%if not th.obj_traits.is_loader_only(obj): +${x}_result_t ${th.make_func_name(n, tags, obj)}( + %for line in th.make_param_lines(n, tags, obj, format=["type", "name", "delim"]): + ${line} + %endfor + ); +%endif +%endfor +%endfor +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 161cc2119e..23382b919e 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -73,27 +73,33 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) ${level-zero-loader_SOURCE_DIR}/include CACHE PATH "Path to Level Zero Headers") endif() -add_library (LevelZeroLoader INTERFACE) +add_library(LevelZeroLoader INTERFACE) # The MSVC linker does not like / at the start of a path, so to work around this # we split it into a link library and a library path, where the path is allowed # to have leading /. get_filename_component(LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}" DIRECTORY) get_filename_component(LEVEL_ZERO_LIB_NAME "${LEVEL_ZERO_LIBRARY}" NAME) target_link_directories(LevelZeroLoader - INTERFACE "${LEVEL_ZERO_LIBRARY_SRC}" + INTERFACE "$" + "$" ) target_link_libraries(LevelZeroLoader INTERFACE "${LEVEL_ZERO_LIB_NAME}" ) -add_library (LevelZeroLoader-Headers INTERFACE) +add_library(LevelZeroLoader-Headers INTERFACE) target_include_directories(LevelZeroLoader-Headers - INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" + INTERFACE "$" + "$" ) if(UR_BUILD_ADAPTER_L0) - add_ur_adapter(ur_adapter_level_zero - SHARED + set(ADAPTER_LIB_TYPE SHARED) + if(UR_STATIC_ADAPTER_L0) + set(ADAPTER_LIB_TYPE STATIC) + endif() + + add_ur_adapter(ur_adapter_level_zero ${ADAPTER_LIB_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp @@ -133,6 +139,20 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp ) + if(UR_STATIC_ADAPTER_L0) + target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO) + + # 'utils' target from 'level-zero-loader' includes path which is prefixed + # in the source directory, this breaks the installation of 'utils' target. + set_target_properties(utils PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "") + install(TARGETS ur_adapter_level_zero ur_umf LevelZeroLoader LevelZeroLoader-Headers ze_loader utils + EXPORT ${PROJECT_NAME}-targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() + if(NOT WIN32) target_sources(ur_adapter_level_zero PRIVATE diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index ed52254ec3..eaabb70a29 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -289,7 +289,8 @@ ur_result_t adapterStateTeardown() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( +namespace ur::level_zero { +ur_result_t urAdapterGet( uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phAdapters. If phAdapters is not NULL, then ///< NumEntries should be greater than zero, otherwise @@ -330,7 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { +ur_result_t urAdapterRelease(ur_adapter_handle_t) { // Check first if the Adapter pointer is valid if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; @@ -342,7 +343,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { +ur_result_t urAdapterRetain(ur_adapter_handle_t) { if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; GlobalAdapter->RefCount++; @@ -351,7 +352,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( +ur_result_t urAdapterGetLastError( ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. @@ -364,11 +365,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( return ErrorMessageCode; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, - ur_adapter_info_t PropName, - size_t PropSize, - void *PropValue, - size_t *PropSizeRet) { +ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -382,3 +381,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 8341d8f68e..1bf4f26716 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -10,6 +10,7 @@ #include "command_buffer.hpp" #include "helpers/kernel_helpers.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" /* L0 Command-buffer Extension Doc see: @@ -297,16 +298,16 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( IsUpdatable(Desc ? Desc->isUpdatable : false), IsProfilingEnabled(Desc ? Desc->enableProfiling : false), IsInOrderCmdList(IsInOrderCmdList) { - urContextRetain(Context); - urDeviceRetain(Device); + ur::level_zero::urContextRetain(Context); + ur::level_zero::urDeviceRetain(Device); } void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { // Release the memory allocated to the Context stored in the command_buffer - urContextRelease(Context); + ur::level_zero::urContextRelease(Context); // Release the device - urDeviceRelease(Device); + ur::level_zero::urDeviceRelease(Device); // Release the memory allocated to the CommandList stored in the // command_buffer @@ -376,7 +377,7 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { for (auto &AssociatedKernel : KernelsList) { ReleaseIndirectMem(AssociatedKernel); - urKernelRelease(AssociatedKernel); + ur::level_zero::urKernelRelease(AssociatedKernel); } } @@ -387,16 +388,16 @@ ur_exp_command_buffer_command_handle_t_:: ur_kernel_handle_t Kernel = nullptr) : CommandBuffer(CommandBuffer), CommandId(CommandId), WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), Kernel(Kernel) { - urCommandBufferRetainExp(CommandBuffer); + ur::level_zero::urCommandBufferRetainExp(CommandBuffer); if (Kernel) - urKernelRetain(Kernel); + ur::level_zero::urKernelRetain(Kernel); } ur_exp_command_buffer_command_handle_t_:: ~ur_exp_command_buffer_command_handle_t_() { - urCommandBufferReleaseExp(CommandBuffer); + ur::level_zero::urCommandBufferReleaseExp(CommandBuffer); if (Kernel) - urKernelRelease(Kernel); + ur::level_zero::urKernelRelease(Kernel); } void ur_exp_command_buffer_handle_t_::registerSyncPoint( @@ -433,7 +434,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue( return UR_RESULT_SUCCESS; } -namespace { +namespace ur::level_zero { /** * Creates a L0 command list @@ -493,9 +494,8 @@ bool canBeInOrder(ur_context_handle_t Context, ? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false) : false; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, ur_exp_command_buffer_handle_t *CommandBuffer) { @@ -567,13 +567,13 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t CommandBuffer) { CommandBuffer->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { if (!CommandBuffer->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -583,7 +583,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); // It is not allowed to append to command list from multiple threads. @@ -627,8 +627,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -namespace { - /** * Sets the global offset for a kernel command that will be appended to the * command buffer. @@ -730,9 +728,8 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, uint32_t WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, @@ -769,7 +766,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( // is in use. Once the event has been signaled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); if (Command && CommandBuffer->IsUpdatable) { UR_CALL(createCommandHandle(CommandBuffer, Kernel, WorkDim, LocalWorkSize, @@ -790,7 +787,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -812,7 +809,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -842,7 +839,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, @@ -875,7 +872,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, const void *Src, uint32_t NumSyncPointsInWaitList, @@ -897,7 +894,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -922,7 +919,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -942,7 +939,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -966,7 +963,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1005,7 +1002,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1067,7 +1064,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1088,7 +1085,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1102,8 +1099,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -namespace { - /** * Gets an L0 command queue that supports the chosen engine. * @param[in] Queue The UR queue used to submit the command buffer. @@ -1225,12 +1220,12 @@ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue, - uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, - ur_event_handle_t *Event) { +ur_result_t +urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, + ur_queue_handle_t Queue, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList, + ur_event_handle_t *Event) { std::scoped_lock Lock(Queue->Mutex); ze_command_queue_handle_t ZeCommandQueue; @@ -1292,13 +1287,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t Command) { Command->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t Command) { if (!Command->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1307,8 +1302,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return UR_RESULT_SUCCESS; } -namespace { - /** * Validates contents of the update command description. * @param[in] Command The command which is being updated. @@ -1618,9 +1611,8 @@ ur_result_t updateKernelCommand( return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1651,10 +1643,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { @@ -1667,10 +1660,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - ur_exp_command_buffer_command_handle_t Command, - ur_exp_command_buffer_command_info_t PropName, size_t PropSize, - void *PropValue, size_t *PropSizeRet) { +ur_result_t +urCommandBufferCommandGetInfoExp(ur_exp_command_buffer_command_handle_t Command, + ur_exp_command_buffer_command_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -1682,3 +1676,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index b7d0a4a913..6dd8a614c5 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 662fb7f3a3..de75dc6126 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -18,7 +18,9 @@ #include "queue.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( +namespace ur::level_zero { + +ur_result_t urContextCreate( uint32_t DeviceCount, ///< [in] the number of devices given in phDevices const ur_device_handle_t *Devices, ///< [in][range(0, DeviceCount)] array of handle of devices. @@ -53,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( +ur_result_t urContextRetain( ur_context_handle_t Context ///< [in] handle of the context to get a reference of. ) { @@ -61,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRelease( +ur_result_t urContextRelease( ur_context_handle_t Context ///< [in] handle of the context to release. ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -85,7 +87,7 @@ static const bool UseMemcpy2DOperations = [] { return std::atoi(UseMemcpy2DOperationsFlag) > 0; }(); -UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( +ur_result_t urContextGetInfo( ur_context_handle_t Context, ///< [in] handle of the context ur_context_info_t ContextInfoType, ///< [in] type of the info to retrieve size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( +ur_result_t urContextGetNativeHandle( ur_context_handle_t Context, ///< [in] handle of the context. ur_native_handle_t *NativeContext ///< [out] a pointer to the native ///< handle of the context. @@ -142,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t NativeContext, ///< [in] the native handle of the context. ur_adapter_handle_t, uint32_t NumDevices, const ur_device_handle_t *Devices, @@ -166,7 +168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( +ur_result_t urContextSetExtendedDeleter( ur_context_handle_t Context, ///< [in] handle of the context. ur_context_extended_deleter_t Deleter, ///< [in] Function pointer to extended deleter. @@ -180,6 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero ur_result_t ur_context_handle_t_::initialize() { diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 0e3f5e7884..c2fbba633f 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index de2bee3789..e6cb650420 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -10,13 +10,59 @@ #include "device.hpp" #include "adapter.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include #include #include -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( +// UR_L0_USE_COPY_ENGINE can be set to an integer value, or +// a pair of integer values of the form "lower_index:upper_index". +// Here, the indices point to copy engines in a list of all available copy +// engines. +// This functions returns this pair of indices. +// If the user specifies only a single integer, a value of 0 indicates that +// the copy engines will not be used at all. A value of 1 indicates that all +// available copy engines can be used. +const std::pair +getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + // If the environment variable is not set, no copy engines are used when + // immediate commandlists are being used. For standard commandlists all are + // used. + if (!EnvVar) { + if (Device->ImmCommandListUsed) + return std::pair(0, 0); // Only main copy engine will be used. + return std::pair(0, INT_MAX); // All copy engines will be used. + } + std::string CopyEngineRange = EnvVar; + // Environment variable can be a single integer or a pair of integers + // separated by ":" + auto pos = CopyEngineRange.find(":"); + if (pos == std::string::npos) { + bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); + if (UseCopyEngine) + return std::pair(0, INT_MAX); // All copy engines can be used. + return std::pair(-1, -1); // No copy engines will be used. + } + int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); + int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); + if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || + (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { + logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " + "default set."); + LowerCopyEngineIndex = 0; + UpperCopyEngineIndex = INT_MAX; + } + return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); +} + +namespace ur::level_zero { + +ur_result_t urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to @@ -143,7 +189,7 @@ uint64_t calculateGlobalMemSize(ur_device_handle_t Device) { return Device->ZeGlobalMemSize.operator->()->value; } -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( +ur_result_t urDeviceGetInfo( ur_device_handle_t Device, ///< [in] handle of the device instance ur_device_info_t ParamName, ///< [in] type of the info to retrieve size_t propSize, ///< [in] the number of bytes pointed to by ParamValue. @@ -1068,158 +1114,353 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_SUCCESS; } -// UR_L0_USE_COPY_ENGINE can be set to an integer value, or -// a pair of integer values of the form "lower_index:upper_index". -// Here, the indices point to copy engines in a list of all available copy -// engines. -// This functions returns this pair of indices. -// If the user specifies only a single integer, a value of 0 indicates that -// the copy engines will not be used at all. A value of 1 indicates that all -// available copy engines can be used. -const std::pair -getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { - const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); - const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); - static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); - // If the environment variable is not set, no copy engines are used when - // immediate commandlists are being used. For standard commandlists all are - // used. - if (!EnvVar) { - if (Device->ImmCommandListUsed) - return std::pair(0, 0); // Only main copy engine will be used. - return std::pair(0, INT_MAX); // All copy engines will be used. - } - std::string CopyEngineRange = EnvVar; - // Environment variable can be a single integer or a pair of integers - // separated by ":" - auto pos = CopyEngineRange.find(":"); - if (pos == std::string::npos) { - bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); - if (UseCopyEngine) - return std::pair(0, INT_MAX); // All copy engines can be used. - return std::pair(-1, -1); // No copy engines will be used. - } - int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); - int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); - if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || - (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { - logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " - "default set."); - LowerCopyEngineIndex = 0; - UpperCopyEngineIndex = INT_MAX; - } - return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); -} - bool CopyEngineRequested(const ur_device_handle_t &Device) { int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first; int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second; return ((LowerCopyQueueIndex != -1) || (UpperCopyQueueIndex != -1)); } -// Whether immediate commandlists will be used for kernel launches and copies. -// The default is standard commandlists. Setting 1 or 2 specifies use of -// immediate commandlists. Note: when immediate commandlists are used then -// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. -// (See env var UR_L0_DEVICE_SCOPE_EVENTS). - -// Get value of immediate commandlists env var setting or -1 if unset -ur_device_handle_t_::ImmCmdlistMode -ur_device_handle_t_::useImmediateCommandLists() { - // If immediate commandlist setting is not explicitly set, then use the device - // default. - // TODO: confirm this is good once make_queue revert is added - static const int ImmediateCommandlistsSetting = [] { - const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); - const char *PiRet = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); - const char *ImmediateCommandlistsSettingStr = - UrRet ? UrRet : (PiRet ? PiRet : nullptr); - if (!ImmediateCommandlistsSettingStr) - return -1; - return std::atoi(ImmediateCommandlistsSettingStr); - }(); - - if (ImmediateCommandlistsSetting == -1) { - bool isDG2SupportedDriver = - this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820); - if ((isDG2SupportedDriver && isDG2()) || isPVC()) { - return PerQueue; - } else { - return NotUsed; +ur_result_t urDevicePartition( + ur_device_handle_t Device, ///< [in] handle of the device to partition. + const ur_device_partition_properties_t + *Properties, ///< [in] Device partition properties. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t + *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle + ///< of devices. If NumDevices is less than the number of + ///< sub-devices available, then the function shall only + ///< retrieve that number of sub-devices. + uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of + ///< sub-devices the device can be partitioned into + ///< according to the partitioning property. +) { + // Other partitioning ways are not supported by Level Zero + UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + if ((Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && + Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { + return UR_RESULT_ERROR_INVALID_VALUE; } + } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + if (Properties->pProperties->value.affinity_domain != 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_INVALID_VALUE; } - switch (ImmediateCommandlistsSetting) { - case 0: - return NotUsed; - case 1: - return PerQueue; - case 2: - return PerThreadPerQueue; - default: - return NotUsed; - } -} -bool ur_device_handle_t_::useRelaxedAllocationLimits() { - static const bool EnableRelaxedAllocationLimits = [] { - auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); - const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; - return RetVal; - }(); + // Devices cache is normally created in piDevicesGet but still make + // sure that cache is populated. + // + auto Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != UR_RESULT_SUCCESS) { + return Res; + } - return EnableRelaxedAllocationLimits; -} + auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { + if (Device->SubDevices.size() == 0) + return 0; -bool ur_device_handle_t_::useDriverInOrderLists() { - // Use in-order lists implementation from L0 driver instead - // of adapter's implementation. + // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. + // However, if + // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that + // still expose CSlices in partitioning by affinity domain for compatibility + // reasons. + if (Properties->pProperties->type == + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && + !ExposeCSliceInAffinityPartitioning) { + if (Device->isSubDevice()) { + return 0; + } + } + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + // Not a CSlice-based partitioning. + if (!Device->SubDevices[0]->isCCS()) { + return 0; + } + } - static const bool UseDriverInOrderLists = [&] { - const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); - bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar( - 1, 3, L0_DRIVER_INORDER_MIN_VERSION); - if (!UrRet) - return CompatibleDriver; - return std::atoi(UrRet) != 0; + return Device->SubDevices.size(); }(); - return UseDriverInOrderLists; -} - -ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, - int SubSubDeviceIndex) { - // Maintain various device properties cache. - // Note that we just describe here how to compute the data. - // The real initialization is upon first access. + // TODO: Consider support for partitioning to <= total sub-devices. + // Currently supported partitioning (by affinity domain/numa) would always + // partition to all sub-devices. // - auto ZeDevice = this->ZeDevice; - ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); - }; - - ZeDeviceComputeProperties.Compute = - [ZeDevice](ze_device_compute_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); - }; + if (NumDevices != 0) + UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - ZeDeviceIpVersionExt.Compute = - [ZeDevice](ze_device_ip_version_ext_t &Properties) { - ze_device_properties_t P; - P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; - P.pNext = (void *)&Properties; - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); - }; + for (uint32_t I = 0; I < NumDevices; I++) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + // In case the value is NEXT_PARTITIONABLE, we need to change it to the + // chosen domain. This will always be NUMA since that's the only domain + // supported by level zero. + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; - ZeDeviceImageProperties.Compute = - [ZeDevice](ze_device_image_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); - }; + OutDevices[I] = Device->SubDevices[I]; + // reusing the same pi_device needs to increment the reference count + ur::level_zero::urDeviceRetain(OutDevices[I]); + } - ZeDeviceModuleProperties.Compute = - [ZeDevice](ze_device_module_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); - }; + if (NumDevicesRet) { + *NumDevicesRet = EffectiveNumDevices; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceSelectBinary( + ur_device_handle_t + Device, ///< [in] handle of the device to select binary for. + const ur_device_binary_t + *Binaries, ///< [in] the array of binaries to select from. + uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. + ///< Must greater than or equal to zero otherwise + ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. + uint32_t + *SelectedBinary ///< [out] the index of the selected binary in the input + ///< array of binaries. If a suitable binary was not + ///< found the function returns ${X}_INVALID_BINARY. +) { + std::ignore = Device; + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible. + // + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Look for GEN binary, which we known can only be handled by Level-Zero now. + const char *BinaryTarget = + UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + + uint32_t *SelectedBinaryInd = SelectedBinary; + + // Find the appropriate device image, fallback to spirv if not found + constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); + uint32_t Spirv = InvalidInd; + + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { + *SelectedBinaryInd = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(Binaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + Spirv = i; + } + // Points to a spirv image, if such indeed was found + if ((*SelectedBinaryInd = Spirv) != InvalidInd) + return UR_RESULT_SUCCESS; + + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} + +ur_result_t urDeviceGetNativeHandle( + ur_device_handle_t Device, ///< [in] handle of the device. + ur_native_handle_t + *NativeDevice ///< [out] a pointer to the native handle of the device. +) { + *NativeDevice = reinterpret_cast(Device->ZeDevice); + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceCreateWithNativeHandle( + ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. + [[maybe_unused]] ur_adapter_handle_t + Adapter, ///< [in] handle of the platform instance + [[maybe_unused]] const ur_device_native_properties_t + *Properties, ///< [in][optional] pointer to native device properties + ///< struct. + ur_device_handle_t + *Device ///< [out] pointer to the handle of the device object created. +) { + auto ZeDevice = ur_cast(NativeDevice); + + // The SYCL spec requires that the set of devices must remain fixed for the + // duration of the application's execution. We assume that we found all of the + // Level Zero devices when we initialized the platforms/devices cache, so the + // "NativeHandle" must already be in the cache. If it is not, this must not be + // a valid Level Zero device. + + ur_device_handle_t Dev = nullptr; + if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { + for (const auto &p : *platforms) { + Dev = p->getDeviceFromNativeHandle(ZeDevice); + } + } else { + return GlobalAdapter->PlatformCache->get_error(); + } + + if (Dev == nullptr) + return UR_RESULT_ERROR_INVALID_VALUE; + + *Device = Dev; + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceGetGlobalTimestamps( + ur_device_handle_t Device, ///< [in] handle of the device instance + uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's + ///< global timestamp that correlates with the + ///< Host's global timestamp value + uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global + ///< timestamp that correlates with the Device's + ///< global timestamp value +) { + const uint64_t &ZeTimerResolution = + Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = Device->getTimestampMask(); + uint64_t DeviceClockCount, Dummy; + + ZE2UR_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, + HostTimestamp == nullptr ? &Dummy : HostTimestamp, + &DeviceClockCount)); + + if (DeviceTimestamp != nullptr) { + *DeviceTimestamp = + (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRetain(ur_device_handle_t Device) { + // The root-device ref-count remains unchanged (always 1). + if (Device->isSubDevice()) { + Device->RefCount.increment(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRelease(ur_device_handle_t Device) { + // Root devices are destroyed during the piTearDown process. + if (Device->isSubDevice()) { + if (Device->RefCount.decrementAndTest()) { + delete Device; + } + } + + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + +// Whether immediate commandlists will be used for kernel launches and copies. +// The default is standard commandlists. Setting 1 or 2 specifies use of +// immediate commandlists. Note: when immediate commandlists are used then +// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. +// (See env var UR_L0_DEVICE_SCOPE_EVENTS). + +// Get value of immediate commandlists env var setting or -1 if unset +ur_device_handle_t_::ImmCmdlistMode +ur_device_handle_t_::useImmediateCommandLists() { + // If immediate commandlist setting is not explicitly set, then use the device + // default. + // TODO: confirm this is good once make_queue revert is added + static const int ImmediateCommandlistsSetting = [] { + const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + const char *ImmediateCommandlistsSettingStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!ImmediateCommandlistsSettingStr) + return -1; + return std::atoi(ImmediateCommandlistsSettingStr); + }(); + + if (ImmediateCommandlistsSetting == -1) { + bool isDG2SupportedDriver = + this->Platform->isDriverVersionNewerOrSimilar(1, 5, 30820); + if ((isDG2SupportedDriver && isDG2()) || isPVC()) { + return PerQueue; + } else { + return NotUsed; + } + } + switch (ImmediateCommandlistsSetting) { + case 0: + return NotUsed; + case 1: + return PerQueue; + case 2: + return PerThreadPerQueue; + default: + return NotUsed; + } +} + +bool ur_device_handle_t_::useRelaxedAllocationLimits() { + static const bool EnableRelaxedAllocationLimits = [] { + auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); + const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; + return RetVal; + }(); + + return EnableRelaxedAllocationLimits; +} + +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + + static const bool UseDriverInOrderLists = [&] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + bool CompatibleDriver = this->Platform->isDriverVersionNewerOrSimilar( + 1, 3, L0_DRIVER_INORDER_MIN_VERSION); + if (!UrRet) + return CompatibleDriver; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + +ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, + int SubSubDeviceIndex) { + // Maintain various device properties cache. + // Note that we just describe here how to compute the data. + // The real initialization is upon first access. + // + auto ZeDevice = this->ZeDevice; + ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceComputeProperties.Compute = + [ZeDevice](ze_device_compute_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceIpVersionExt.Compute = + [ZeDevice](ze_device_ip_version_ext_t &Properties) { + ze_device_properties_t P; + P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + P.pNext = (void *)&Properties; + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); + }; + + ZeDeviceImageProperties.Compute = + [ZeDevice](ze_device_image_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceModuleProperties.Compute = + [ZeDevice](ze_device_module_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); + }; ZeDeviceMemoryProperties.Compute = [ZeDevice]( @@ -1314,7 +1555,7 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_ERROR_UNKNOWN; } - if (CopyEngineRequested((ur_device_handle_t)this)) { + if (ur::level_zero::CopyEngineRequested((ur_device_handle_t)this)) { for (uint32_t i = 0; i < numQueueGroups; i++) { if (((QueueGroupProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && @@ -1355,26 +1596,6 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_SUCCESS; } -ur_result_t urDeviceRetain(ur_device_handle_t Device) { - - // The root-device ref-count remains unchanged (always 1). - if (Device->isSubDevice()) { - Device->RefCount.increment(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urDeviceRelease(ur_device_handle_t Device) { - // Root devices are destroyed during the piTearDown process. - if (Device->isSubDevice()) { - if (Device->RefCount.decrementAndTest()) { - delete Device; - } - } - - return UR_RESULT_SUCCESS; -} - void ZeDriverVersionStringExtension::setZeDriverVersionString( ur_platform_handle_t_ *Platform) { // Check if Intel Driver Version String is available. If yes, save the API @@ -1442,221 +1663,3 @@ void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle, void *HostPtr) { ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr)); } - -UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( - ur_device_handle_t Device, ///< [in] handle of the device to partition. - const ur_device_partition_properties_t - *Properties, ///< [in] Device partition properties. - uint32_t NumDevices, ///< [in] the number of sub-devices. - ur_device_handle_t - *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle - ///< of devices. If NumDevices is less than the number of - ///< sub-devices available, then the function shall only - ///< retrieve that number of sub-devices. - uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of - ///< sub-devices the device can be partitioned into - ///< according to the partitioning property. -) { - // Other partitioning ways are not supported by Level Zero - UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - if ((Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && - Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - if (Properties->pProperties->value.affinity_domain != 0) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else { - return UR_RESULT_ERROR_INVALID_VALUE; - } - - // Devices cache is normally created in piDevicesGet but still make - // sure that cache is populated. - // - auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != UR_RESULT_SUCCESS) { - return Res; - } - - auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { - if (Device->SubDevices.size() == 0) - return 0; - - // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. - // However, if - // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that - // still expose CSlices in partitioning by affinity domain for compatibility - // reasons. - if (Properties->pProperties->type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && - !ExposeCSliceInAffinityPartitioning) { - if (Device->isSubDevice()) { - return 0; - } - } - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - // Not a CSlice-based partitioning. - if (!Device->SubDevices[0]->isCCS()) { - return 0; - } - } - - return Device->SubDevices.size(); - }(); - - // TODO: Consider support for partitioning to <= total sub-devices. - // Currently supported partitioning (by affinity domain/numa) would always - // partition to all sub-devices. - // - if (NumDevices != 0) - UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - - for (uint32_t I = 0; I < NumDevices; I++) { - auto prop = Properties->pProperties[0]; - if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - // In case the value is NEXT_PARTITIONABLE, we need to change it to the - // chosen domain. This will always be NUMA since that's the only domain - // supported by level zero. - prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - } - Device->SubDevices[I]->SubDeviceCreationProperty = prop; - - OutDevices[I] = Device->SubDevices[I]; - // reusing the same pi_device needs to increment the reference count - urDeviceRetain(OutDevices[I]); - } - - if (NumDevicesRet) { - *NumDevicesRet = EffectiveNumDevices; - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( - ur_device_handle_t - Device, ///< [in] handle of the device to select binary for. - const ur_device_binary_t - *Binaries, ///< [in] the array of binaries to select from. - uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. - ///< Must greater than or equal to zero otherwise - ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. - uint32_t - *SelectedBinary ///< [out] the index of the selected binary in the input - ///< array of binaries. If a suitable binary was not - ///< found the function returns ${X}_INVALID_BINARY. -) { - std::ignore = Device; - // TODO: this is a bare-bones implementation for choosing a device image - // that would be compatible with the targeted device. An AOT-compiled - // image is preferred over SPIR-V for known devices (i.e. Intel devices) - // The implementation makes no effort to differentiate between multiple images - // for the given device, and simply picks the first one compatible. - // - // Real implementation will use the same mechanism OpenCL ICD dispatcher - // uses. Something like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - // Look for GEN binary, which we known can only be handled by Level-Zero now. - const char *BinaryTarget = - UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - - uint32_t *SelectedBinaryInd = SelectedBinary; - - // Find the appropriate device image, fallback to spirv if not found - constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); - uint32_t Spirv = InvalidInd; - - for (uint32_t i = 0; i < NumBinaries; ++i) { - if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { - *SelectedBinaryInd = i; - return UR_RESULT_SUCCESS; - } - if (strcmp(Binaries[i].pDeviceTargetSpec, - UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) - Spirv = i; - } - // Points to a spirv image, if such indeed was found - if ((*SelectedBinaryInd = Spirv) != InvalidInd) - return UR_RESULT_SUCCESS; - - // No image can be loaded for the given device - return UR_RESULT_ERROR_INVALID_BINARY; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( - ur_device_handle_t Device, ///< [in] handle of the device. - ur_native_handle_t - *NativeDevice ///< [out] a pointer to the native handle of the device. -) { - *NativeDevice = reinterpret_cast(Device->ZeDevice); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. - [[maybe_unused]] ur_adapter_handle_t - Adapter, ///< [in] handle of the platform instance - [[maybe_unused]] const ur_device_native_properties_t - *Properties, ///< [in][optional] pointer to native device properties - ///< struct. - ur_device_handle_t - *Device ///< [out] pointer to the handle of the device object created. -) { - auto ZeDevice = ur_cast(NativeDevice); - - // The SYCL spec requires that the set of devices must remain fixed for the - // duration of the application's execution. We assume that we found all of the - // Level Zero devices when we initialized the platforms/devices cache, so the - // "NativeHandle" must already be in the cache. If it is not, this must not be - // a valid Level Zero device. - - ur_device_handle_t Dev = nullptr; - if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { - for (const auto &p : *platforms) { - Dev = p->getDeviceFromNativeHandle(ZeDevice); - } - } else { - return GlobalAdapter->PlatformCache->get_error(); - } - - if (Dev == nullptr) - return UR_RESULT_ERROR_INVALID_VALUE; - - *Device = Dev; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( - ur_device_handle_t Device, ///< [in] handle of the device instance - uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's - ///< global timestamp that correlates with the - ///< Host's global timestamp value - uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global - ///< timestamp that correlates with the Device's - ///< global timestamp value -) { - const uint64_t &ZeTimerResolution = - Device->ZeDeviceProperties->timerResolution; - const uint64_t TimestampMaxCount = Device->getTimestampMask(); - uint64_t DeviceClockCount, Dummy; - - ZE2UR_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, - HostTimestamp == nullptr ? &Dummy : HostTimestamp, - &DeviceClockCount)); - - if (DeviceTimestamp != nullptr) { - *DeviceTimestamp = - (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; - } - - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 898edff779..a8b8098819 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/enqueue_native.cpp b/source/adapters/level_zero/enqueue_native.cpp index bdffb2e0c4..7c3a1da988 100644 --- a/source/adapters/level_zero/enqueue_native.cpp +++ b/source/adapters/level_zero/enqueue_native.cpp @@ -12,7 +12,9 @@ #include #include -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( +namespace ur::level_zero { + +ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, @@ -31,3 +33,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 0668cc185e..84a7c0b159 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -18,6 +18,7 @@ #include "common.hpp" #include "event.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" void printZeEventList(const _ur_ze_event_list_t &UrZeEventList) { @@ -59,7 +60,9 @@ bool WaitListEmptyOrAllEventsFromSameQueue( return true; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( +namespace ur::level_zero { + +ur_result_t urEnqueueEventsWait( ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -151,7 +154,7 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( +ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -230,7 +233,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, EventWaitList) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { - UR_CALL(urEventRetain(Queue->LastCommandEvent)); + UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent)); *Event = Queue->LastCommandEvent; return UR_RESULT_SUCCESS; } @@ -386,7 +389,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( +ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query size_t PropValueSize, ///< [in] size in bytes of the event property value @@ -469,7 +472,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( +ur_result_t urEventGetProfilingInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_profiling_info_t PropName, ///< [in] the name of the profiling property to query @@ -658,7 +661,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t Queue, ///< [in] handle of the queue object bool Blocking, ///< [in] blocking or non-blocking enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list @@ -696,7 +699,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( (*OutEvent)->WaitList = TmpWaitList; uint64_t DeviceStartTimestamp = 0; - UR_CALL(urDeviceGetGlobalTimestamps(Device, &DeviceStartTimestamp, nullptr)); + UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps( + Device, &DeviceStartTimestamp, nullptr)); (*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp; // Create a new entry in the queue's recordings. @@ -715,60 +719,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( return UR_RESULT_SUCCESS; } -ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( - ze_event_handle_t &ZeHostVisibleEvent) { - auto UrQueue = this->UrQueue; - - std::scoped_lock Lock(UrQueue->Mutex, - this->Mutex); - - if (!HostVisibleEvent) { - this->IsCreatingHostProxyEvent = true; - if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) - die("getOrCreateHostVisibleEvent: missing host-visible event"); - - // Submit the command(s) signalling the proxy event to the queue. - // We have to first submit a wait for the device-only event for which this - // proxy is created. - // - // Get a new command list to be used on this call - - // We want to batch these commands to avoid extra submissions (costly) - bool OkToBatch = true; - - ur_command_list_ptr_t CommandList{}; - UR_CALL(UrQueue->Context->getAvailableCommandList( - UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) - - // Create a "proxy" host-visible event. - UR_CALL(createEventAndAssociateQueue( - UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, - /* IsInternal */ false, /* IsMultiDevice */ false, - /* HostVisible */ true)); - - if (this->IsInnerBatchedEvent) { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CommandList->first, ZeEvent, 0, nullptr)); - } else { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandList->first, 1, &ZeEvent)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CommandList->first, HostVisibleEvent->ZeEvent)); - - UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) - this->IsCreatingHostProxyEvent = false; - } - - ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventWait( - uint32_t NumEvents, ///< [in] number of events in the event list - const ur_event_handle_t - *EventWaitList ///< [in][range(0, numEvents)] pointer to a list of - ///< events to wait for completion +ur_result_t +urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list + const ur_event_handle_t + *EventWaitList ///< [in][range(0, numEvents)] pointer to a list + ///< of events to wait for completion ) { for (uint32_t I = 0; I < NumEvents; I++) { auto e = EventWaitList[I]; @@ -855,8 +810,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRetain(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal++; Event->RefCount.increment(); @@ -864,8 +819,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRelease(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal--; UR_CALL(urEventReleaseInternal(Event)); @@ -873,7 +828,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( +ur_result_t urEventGetNativeHandle( ur_event_handle_t Event, ///< [in] handle of the event. ur_native_handle_t *NativeEvent ///< [out] a pointer to the native handle of the event. @@ -898,7 +853,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( +ur_result_t urExtEventCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. @@ -911,7 +866,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( +ur_result_t urEventCreateWithNativeHandle( ur_native_handle_t NativeEvent, ///< [in] the native handle of the event. ur_context_handle_t Context, ///< [in] handle of the context object const ur_event_native_properties_t *Properties, @@ -961,7 +916,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( +ur_result_t urEventSetCallback( ur_event_handle_t Event, ///< [in] handle of the event object ur_execution_info_t ExecStatus, ///< [in] execution status of the event ur_event_callback_t Notify, ///< [in] execution status of the event @@ -977,6 +932,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero + +ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( + ze_event_handle_t &ZeHostVisibleEvent) { + auto UrQueue = this->UrQueue; + + std::scoped_lock Lock(UrQueue->Mutex, + this->Mutex); + + if (!HostVisibleEvent) { + this->IsCreatingHostProxyEvent = true; + if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) + die("getOrCreateHostVisibleEvent: missing host-visible event"); + + // Submit the command(s) signalling the proxy event to the queue. + // We have to first submit a wait for the device-only event for which this + // proxy is created. + // + // Get a new command list to be used on this call + + // We want to batch these commands to avoid extra submissions (costly) + bool OkToBatch = true; + + ur_command_list_ptr_t CommandList{}; + UR_CALL(UrQueue->Context->getAvailableCommandList( + UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) + + // Create a "proxy" host-visible event. + UR_CALL(createEventAndAssociateQueue( + UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, + /* IsInternal */ false, /* IsMultiDevice */ false, + /* HostVisible */ true)); + + if (this->IsInnerBatchedEvent) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandList->first, ZeEvent, 0, nullptr)); + } else { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandList->first, 1, &ZeEvent)); + } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandList->first, HostVisibleEvent->ZeEvent)); + + UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) + this->IsCreatingHostProxyEvent = false; + } + + ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; + return UR_RESULT_SUCCESS; +} + ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (!Event->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1152,7 +1158,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // We've reset event data members above, now cleanup resources. if (AssociatedKernel) { ReleaseIndirectMem(AssociatedKernel); - UR_CALL(urKernelRelease(AssociatedKernel)); + UR_CALL(ur::level_zero::urKernelRelease(AssociatedKernel)); } if (AssociatedQueue) { @@ -1211,7 +1217,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, } if (DepEventKernel) { ReleaseIndirectMem(DepEventKernel); - UR_CALL(urKernelRelease(DepEventKernel)); + UR_CALL(ur::level_zero::urKernelRelease(DepEventKernel)); } UR_CALL(urEventReleaseInternal(DepEvent)); } diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index 2d1f536e4e..7dd64acdaa 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index c4623f314c..fc623e7e74 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -14,6 +14,7 @@ #include "event.hpp" #include "logger/ur_logger.hpp" #include "sampler.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" typedef ze_result_t(ZE_APICALL *zeImageGetDeviceOffsetExp_pfn)( @@ -631,11 +632,14 @@ getImageFormatTypeAndSize(const ur_image_format_t *ImageFormat) { return {ZeImageFormatType, ZeImageFormatTypeSize}; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { +namespace ur::level_zero { + +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { std::shared_lock Lock(hContext->Mutex); UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -668,13 +672,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( *pResultPitch = RowPitch; size_t Size = height * RowPitch; - UR_CALL(urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, Size, ppMem)); + UR_CALL(ur::level_zero::urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, + Size, ppMem)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesUnsampledImageHandleDestroyExp( +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { UR_ASSERT(hContext && hDevice && hImage, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -691,17 +695,16 @@ urBindlessImagesUnsampledImageHandleDestroyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesSampledImageHandleDestroyExp( +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { // Sampled image is a combination of unsampled image and sampler. // Sampler is released in urSamplerRelease. - return urBindlessImagesUnsampledImageHandleDestroyExp(hContext, hDevice, - hImage); + return ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp( + hContext, hDevice, hImage); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( +ur_result_t urBindlessImagesImageAllocateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -730,16 +733,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { std::ignore = hContext; std::ignore = hDevice; - UR_CALL(urMemRelease(reinterpret_cast(hImageMem))); + UR_CALL(ur::level_zero::urMemRelease( + reinterpret_cast(hImageMem))); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( +ur_result_t urBindlessImagesUnsampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -749,7 +754,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( +ur_result_t urBindlessImagesSampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -759,7 +764,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( +ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, [[maybe_unused]] const void *pSrc, [[maybe_unused]] void *pDst, [[maybe_unused]] const ur_image_desc_t *pSrcImageDesc, @@ -920,7 +925,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hImageMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -970,7 +975,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( } } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -984,13 +989,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { - return urBindlessImagesImageFreeExp(hContext, hDevice, hMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { + return ur::level_zero::urBindlessImagesImageFreeExp(hContext, hDevice, hMem); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_external_mem_desc_t *pExternalMemDesc, @@ -1050,7 +1056,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_external_mem_handle_t hExternalMem, @@ -1085,7 +1091,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **phRetMem) { std::ignore = hContext; @@ -1099,7 +1105,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( +ur_result_t urBindlessImagesReleaseExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_mem_handle_t hExternalMem) { @@ -1109,7 +1115,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( struct ur_ze_external_memory_data *externalMemoryData = reinterpret_cast(hExternalMem); - UR_CALL(urMemRelease(externalMemoryData->urMemoryHandle)); + UR_CALL(ur::level_zero::urMemRelease(externalMemoryData->urMemoryHandle)); switch (externalMemoryData->type) { case UR_ZE_EXTERNAL_OPAQUE_FD: @@ -1129,7 +1135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, @@ -1144,7 +1150,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_handle_t hExternalSemaphore) { std::ignore = hContext; @@ -1155,7 +1161,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -1171,7 +1177,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -1186,3 +1192,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/image.hpp b/source/adapters/level_zero/image.hpp index 618258601d..43f37fa757 100644 --- a/source/adapters/level_zero/image.hpp +++ b/source/adapters/level_zero/image.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include #include #include diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index ac942c173e..8e627f3ade 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -11,11 +11,29 @@ #include "kernel.hpp" #include "logger/ur_logger.hpp" #include "ur_api.h" -#include "ur_level_zero.hpp" +#include "ur_interface_loader.hpp" #include "helpers/kernel_helpers.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( +ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, + ze_kernel_handle_t *phZeKernel) { + if (hKernel->ZeKernelMap.empty()) { + *phZeKernel = hKernel->ZeKernel; + } else { + auto It = hKernel->ZeKernelMap.find(hDevice); + if (It == hKernel->ZeKernelMap.end()) { + /* kernel and queue don't match */ + return UR_RESULT_ERROR_INVALID_QUEUE; + } + *phZeKernel = It->second; + } + + return UR_RESULT_SUCCESS; +} + +namespace ur::level_zero { + +ur_result_t urKernelGetSuggestedLocalWorkSize( ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim, [[maybe_unused]] const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) { @@ -38,23 +56,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( return UR_RESULT_SUCCESS; } -ur_result_t getZeKernel(ze_device_handle_t hDevice, ur_kernel_handle_t hKernel, - ze_kernel_handle_t *phZeKernel) { - if (hKernel->ZeKernelMap.empty()) { - *phZeKernel = hKernel->ZeKernel; - } else { - auto It = hKernel->ZeKernelMap.find(hDevice); - if (It == hKernel->ZeKernelMap.end()) { - /* kernel and queue don't match */ - return UR_RESULT_ERROR_INVALID_QUEUE; - } - *phZeKernel = It->second; - } - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify @@ -158,7 +160,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -204,7 +206,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t WorkDim, ///< [in] number of dimensions, from 1 to 3, to specify @@ -422,7 +424,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -468,7 +470,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. ur_program_handle_t Program, ///< [in] handle of the program containing the ///< device global variable. @@ -522,7 +524,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( +ur_result_t urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. ur_program_handle_t Program, ///< [in] handle of the program containing the ///< device global variable. @@ -576,7 +578,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( +ur_result_t urKernelCreate( ur_program_handle_t Program, ///< [in] handle of the program instance const char *KernelName, ///< [in] pointer to null-terminated string. ur_kernel_handle_t @@ -639,7 +641,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of argument type @@ -689,7 +691,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( +ur_result_t urKernelSetArgLocal( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of the local buffer to be allocated by the @@ -699,12 +701,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( ) { std::ignore = Properties; - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, ArgSize, nullptr, nullptr)); + UR_CALL(ur::level_zero::urKernelSetArgValue(Kernel, ArgIndex, ArgSize, + nullptr, nullptr)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( +ur_result_t urKernelGetInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_kernel_info_t ParamName, ///< [in] name of the Kernel property to query size_t PropSize, ///< [in] the size of the Kernel property value. @@ -766,7 +769,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( +ur_result_t urKernelGetGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_group_info_t @@ -847,7 +850,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( +ur_result_t urKernelGetSubGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_sub_group_info_t @@ -878,7 +881,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to retain ) { Kernel->RefCount.increment(); @@ -886,7 +889,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to release ) { if (!Kernel->RefCount.decrementAndTest()) @@ -903,7 +906,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( } Kernel->ZeKernelMap.clear(); if (IndirectAccessTrackingEnabled) { - UR_CALL(urContextRelease(KernelProgram->Context)); + UR_CALL(ur::level_zero::urContextRelease(KernelProgram->Context)); } // do a release on the program this kernel was part of without delete of the // program handle @@ -914,7 +917,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -926,12 +929,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( std::ignore = Properties; // KernelSetArgValue is expecting a pointer to the argument - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, sizeof(const void *), nullptr, - &ArgValue)); + UR_CALL(ur::level_zero::urKernelSetArgValue( + Kernel, ArgIndex, sizeof(const void *), nullptr, &ArgValue)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( +ur_result_t urKernelSetExecInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object ur_kernel_exec_info_t PropName, ///< [in] name of the execution attribute size_t PropSize, ///< [in] size in byte the attribute value @@ -977,7 +980,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( +ur_result_t urKernelSetArgSampler( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_sampler_properties_t @@ -995,7 +998,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( +ur_result_t urKernelSetArgMemObj( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_mem_obj_properties_t @@ -1037,7 +1040,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( +ur_result_t urKernelGetNativeHandle( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel. ur_native_handle_t *NativeKernel ///< [out] a pointer to the native handle of the kernel. @@ -1048,7 +1051,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { (void)localWorkSize; @@ -1061,7 +1064,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( +ur_result_t urKernelCreateWithNativeHandle( ur_native_handle_t NativeKernel, ///< [in] the native handle of the kernel. ur_context_handle_t Context, ///< [in] handle of the context object ur_program_handle_t Program, @@ -1097,13 +1100,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in] array of specialization constant value + ///< descriptions +) { + std::ignore = Kernel; + std::ignore = Count; + std::ignore = SpecConstants; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + ur_result_t ur_kernel_handle_t_::initialize() { // Retain the program and context to show it's used by this kernel. - UR_CALL(urProgramRetain(Program)); + UR_CALL(ur::level_zero::urProgramRetain(Program)); if (IndirectAccessTrackingEnabled) // TODO: do piContextRetain without the guard - UR_CALL(urContextRetain(Program->Context)); + UR_CALL(ur::level_zero::urContextRetain(Program->Context)); // Set up how to obtain kernel properties when needed. ZeKernelProperties.Compute = [this](ze_kernel_properties_t &Properties) { @@ -1122,18 +1142,3 @@ ur_result_t ur_kernel_handle_t_::initialize() { return UR_RESULT_SUCCESS; } - -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( - ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in] array of specialization constant value - ///< descriptions -) { - std::ignore = Kernel; - std::ignore = Count; - std::ignore = SpecConstants; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 2fd66948e2..9786092073 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -18,6 +18,7 @@ #include "image.hpp" #include "logger/ur_logger.hpp" #include "queue.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" // Default to using compute engine for fill operation, but allow to @@ -314,7 +315,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(UrContext)); + UR_CALL(ur::level_zero::urContextRetain(UrContext)); } ZeStruct ZeDesc; @@ -473,7 +474,9 @@ static ur_result_t enqueueMemImageCommandHelper( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( +namespace ur::level_zero { + +ur_result_t urEnqueueMemBufferRead( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) @@ -507,7 +510,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( true /* PreferCopyEngine */); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool @@ -544,7 +547,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( true /* PreferCopyEngine */); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) @@ -589,7 +592,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t hBuffer, ///< [in] handle of the buffer object bool @@ -636,7 +639,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( +ur_result_t urEnqueueMemBufferCopy( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the src buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object @@ -687,7 +690,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( +ur_result_t urEnqueueMemBufferCopyRect( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t BufferSrc, ///< [in] handle of the source buffer object ur_mem_handle_t BufferDst, ///< [in] handle of the dest buffer object @@ -745,7 +748,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( +ur_result_t urEnqueueMemBufferFill( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Buffer, ///< [in] handle of the buffer object const void *Pattern, ///< [in] pointer to the fill pattern @@ -778,7 +781,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( Size, NumEventsInWaitList, EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( +ur_result_t urEnqueueMemImageRead( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Image, ///< [in] handle of the image object bool BlockingRead, ///< [in] indicates blocking (true), non-blocking (false) @@ -809,7 +812,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( +ur_result_t urEnqueueMemImageWrite( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Image, ///< [in] handle of the image object bool @@ -841,7 +844,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( EventWaitList, OutEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( +ur_result_t urEnqueueMemImageCopy( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t ImageSrc, ///< [in] handle of the src image object ur_mem_handle_t ImageDst, ///< [in] handle of the dest image object @@ -880,7 +883,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( +ur_result_t urEnqueueMemBufferMap( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Buf, ///< [in] handle of the buffer object bool BlockingMap, ///< [in] indicates blocking (true), non-blocking (false) @@ -959,10 +962,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. std::scoped_lock Guard(Buffer->Mutex); @@ -1048,7 +1051,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( +ur_result_t urEnqueueMemUnmap( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_mem_handle_t Mem, ///< [in] handle of the memory (buffer or image) object void *MappedPtr, ///< [in] mapped host address @@ -1115,10 +1118,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); char *ZeHandleDst; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -1175,7 +1178,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( +ur_result_t urEnqueueUSMMemcpy( ur_queue_handle_t Queue, ///< [in] handle of the queue object bool Blocking, ///< [in] blocking or non-blocking copy void *Dst, ///< [in] pointer to the destination USM memory object @@ -1214,7 +1217,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( NumEventsInWaitList, EventWaitList, OutEvent, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( +ur_result_t urEnqueueUSMPrefetch( ur_queue_handle_t Queue, ///< [in] handle of the queue object const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be fetched @@ -1282,7 +1285,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( +ur_result_t urEnqueueUSMAdvise( ur_queue_handle_t Queue, ///< [in] handle of the queue object const void *Mem, ///< [in] pointer to the USM memory object size_t Size, ///< [in] size in bytes to be advised @@ -1340,7 +1343,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( +ur_result_t urEnqueueUSMFill2D( ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. void *Mem, ///< [in] pointer to memory to be filled. size_t Pitch, ///< [in] the total width of the destination memory including @@ -1375,7 +1378,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( +ur_result_t urEnqueueUSMMemcpy2D( ur_queue_handle_t Queue, ///< [in] handle of the queue to submit to. bool Blocking, ///< [in] indicates if this operation should block the host. void *Dst, ///< [in] pointer to memory where data will be copied. @@ -1497,7 +1500,7 @@ static ur_result_t ur2zeImageDesc(const ur_image_format_t *ImageFormat, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( +ur_result_t urMemImageCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags const ur_image_format_t @@ -1546,7 +1549,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. [[maybe_unused]] const ur_image_format_t @@ -1574,7 +1577,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( +ur_result_t urMemBufferCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags size_t Size, ///< [in] size in bytes of the memory object to be allocated @@ -1668,14 +1671,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRetain( +ur_result_t urMemRetain( ur_mem_handle_t Mem ///< [in] handle of the memory object to get access ) { Mem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( +ur_result_t urMemRelease( ur_mem_handle_t Mem ///< [in] handle of the memory object to release ) { if (!Mem->RefCount.decrementAndTest()) @@ -1701,7 +1704,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( +ur_result_t urMemBufferPartition( ur_mem_handle_t Buffer, ///< [in] handle of the buffer object to allocate from ur_mem_flags_t Flags, ///< [in] allocation and usage information flags @@ -1737,7 +1740,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( +ur_result_t urMemGetNativeHandle( ur_mem_handle_t Mem, ///< [in] handle of the mem. ur_device_handle_t, ///< [in] handle of the device. ur_native_handle_t @@ -1751,7 +1754,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( +ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. const ur_mem_native_properties_t @@ -1818,7 +1821,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ContextsLock.lock(); // Retain context to be sure that it is released after all memory // allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); Context->MemAllocs.emplace(std::piecewise_construct, std::forward_as_tuple(Ptr), @@ -1854,7 +1857,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( +ur_result_t urMemGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the memory object being queried. ur_mem_info_t MemInfoType, ///< [in] type of the info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -1890,7 +1893,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( +ur_result_t urMemImageGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the image object being queried. ur_image_info_t ImgInfoType, ///< [in] type of image info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointer to by @@ -1913,6 +1916,79 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ur_result_t urEnqueueUSMFill( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + void *Ptr, ///< [in] pointer to USM memory object + size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a + ///< power of 2 and less than or equal to width. + const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. + size_t Size, ///< [in] size in bytes to be set. Must be a multiple of + ///< patternSize. + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, the + ///< numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t *Event ///< [out][optional] return an event object that + ///< identifies this particular command instance. +) { + std::scoped_lock Lock(Queue->Mutex); + + return enqueueMemFillHelper( + // TODO: do we need a new command type for USM memset? + UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr, + Pattern, // It will be interpreted as an 8-bit value, + PatternSize, // which is indicated with this pattern_size==1 + Size, NumEventsInWaitList, EventWaitList, Event); +} + +/// Host Pipes +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pDst; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hProgram; + std::ignore = pipe_symbol; + std::ignore = blocking; + std::ignore = pSrc; + std::ignore = size; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + // If indirect access tracking is enabled then performs reference counting, // otherwise just calls zeMemAllocDevice. static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, @@ -1932,7 +2008,7 @@ static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } ze_device_mem_alloc_desc_t ZeDesc = {}; @@ -1992,8 +2068,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMHostAlloc( + UrContext, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2051,8 +2128,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMDeviceAlloc(UrContext, Device, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMDeviceAlloc( + UrContext, Device, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { Allocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2115,8 +2193,8 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL( - urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, &ZeHandleHost)); + UR_CALL(ur::level_zero::urUSMHostAlloc(UrContext, &USMDesc, Pool, + Size, &ZeHandleHost)); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(&ZeHandleHost, UrContext, Size)); @@ -2298,70 +2376,3 @@ size_t _ur_buffer::getAlignment() const { Alignment = 1UL; return Alignment; } - -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t Queue, ///< [in] handle of the queue object - void *Ptr, ///< [in] pointer to USM memory object - size_t PatternSize, ///< [in] the size in bytes of the pattern. Must be a - ///< power of 2 and less than or equal to width. - const void *Pattern, ///< [in] pointer with the bytes of the pattern to set. - size_t Size, ///< [in] size in bytes to be set. Must be a multiple of - ///< patternSize. - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t * - EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, the - ///< numEventsInWaitList must be 0, indicating that this - ///< command does not wait on any event to complete. - ur_event_handle_t *Event ///< [out][optional] return an event object that - ///< identifies this particular command instance. -) { - std::scoped_lock Lock(Queue->Mutex); - - return enqueueMemFillHelper( - // TODO: do we need a new command type for USM memset? - UR_COMMAND_MEM_BUFFER_FILL, Queue, Ptr, - Pattern, // It will be interpreted as an 8-bit value, - PatternSize, // which is indicated with this pattern_size==1 - Size, NumEventsInWaitList, EventWaitList, Event); -} - -/// Host Pipes -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = hProgram; - std::ignore = pipe_symbol; - std::ignore = blocking; - std::ignore = pDst; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -UR_APIEXPORT ur_result_t urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = hProgram; - std::ignore = pipe_symbol; - std::ignore = blocking; - std::ignore = pSrc; - std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index dc76abe404..71d102e9dd 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/physical_mem.cpp b/source/adapters/level_zero/physical_mem.cpp index d4d9792f24..e7bb498859 100644 --- a/source/adapters/level_zero/physical_mem.cpp +++ b/source/adapters/level_zero/physical_mem.cpp @@ -14,7 +14,9 @@ #include "device.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( +namespace ur::level_zero { + +ur_result_t urPhysicalMemCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, [[maybe_unused]] const ur_physical_mem_properties_t *pProperties, ur_physical_mem_handle_t *phPhysicalMem) { @@ -35,14 +37,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { hPhysicalMem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { if (!hPhysicalMem->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -52,3 +52,4 @@ urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 68aebf97c7..721db3c359 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -12,7 +12,9 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( +namespace ur::level_zero { + +ur_result_t urPlatformGet( ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phPlatforms. If phPlatforms is not NULL, then @@ -47,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( +ur_result_t urPlatformGetInfo( ur_platform_handle_t Platform, ///< [in] handle of the platform ur_platform_info_t ParamName, ///< [in] type of the info to retrieve size_t Size, ///< [in] the number of bytes pointed to by pPlatformInfo. @@ -101,7 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( +ur_result_t urPlatformGetApiVersion( ur_platform_handle_t Driver, ///< [in] handle of the platform ur_api_version_t *Version ///< [out] api version ) { @@ -110,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( +ur_result_t urPlatformGetNativeHandle( ur_platform_handle_t Platform, ///< [in] handle of the platform. ur_native_handle_t *NativePlatform ///< [out] a pointer to the native ///< handle of the platform. @@ -120,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( +ur_result_t urPlatformCreateWithNativeHandle( ur_native_handle_t NativePlatform, ///< [in] the native handle of the platform. ur_adapter_handle_t, @@ -135,12 +137,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( uint32_t NumPlatforms = 0; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, 0, nullptr, + &NumPlatforms)); if (NumPlatforms) { std::vector Platforms(NumPlatforms); - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), - nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumPlatforms, + Platforms.data(), nullptr)); // The SYCL spec requires that the set of platforms must remain fixed for // the duration of the application's execution. We assume that we found all @@ -158,6 +161,46 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( return UR_RESULT_ERROR_INVALID_VALUE; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-ze-opt-disable' for frontend_option = -O0. +// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. +// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for +// frontend_option=-ftarget-compile-fast. +ur_result_t urPlatformGetBackendOption( + ur_platform_handle_t Platform, ///< [in] handle of the platform instance. + const char *FrontendOption, ///< [in] string containing the frontend option. + const char * + *PlatformOption ///< [out] returns the correct platform specific + ///< compiler option based on the frontend option. +) { + std::ignore = Platform; + using namespace std::literals; + if (FrontendOption == nullptr) { + return UR_RESULT_SUCCESS; + } + if (FrontendOption == ""sv) { + *PlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O0"sv) { + *PlatformOption = "-ze-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || + FrontendOption == "-O3"sv) { + *PlatformOption = "-ze-opt-level=2"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-ftarget-compile-fast"sv) { + *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} + +} // namespace ur::level_zero + ur_result_t ur_platform_handle_t_::initialize() { ZE2UR_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion)); ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." + @@ -513,41 +556,3 @@ ur_device_handle_t ur_platform_handle_t_::getDeviceById(DeviceId id) { } return nullptr; } - -// Returns plugin specific backend option. -// Current support is only for optimization options. -// Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. -// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for -// frontend_option=-ftarget-compile-fast. -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( - ur_platform_handle_t Platform, ///< [in] handle of the platform instance. - const char *FrontendOption, ///< [in] string containing the frontend option. - const char * - *PlatformOption ///< [out] returns the correct platform specific - ///< compiler option based on the frontend option. -) { - std::ignore = Platform; - using namespace std::literals; - if (FrontendOption == nullptr) { - return UR_RESULT_SUCCESS; - } - if (FrontendOption == ""sv) { - *PlatformOption = ""; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O0"sv) { - *PlatformOption = "-ze-opt-disable"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || - FrontendOption == "-O3"sv) { - *PlatformOption = "-ze-opt-level=2"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-ftarget-compile-fast"sv) { - *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; - return UR_RESULT_SUCCESS; - } - return UR_RESULT_ERROR_INVALID_VALUE; -} diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index a6d34ccb23..02aef2d058 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -11,6 +11,7 @@ #include "program.hpp" #include "device.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #ifdef UR_ADAPTER_LEVEL_ZERO_V2 #include "v2/context.hpp" @@ -54,7 +55,9 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule, } } // extern "C" -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( +namespace ur::level_zero { + +ur_result_t urProgramCreateWithIL( ur_context_handle_t Context, ///< [in] handle of the context instance const void *IL, ///< [in] pointer to IL binary. size_t Length, ///< [in] length of `pIL` in bytes. @@ -79,7 +82,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( +ur_result_t urProgramCreateWithBinary( ur_context_handle_t Context, ///< [in] handle of the context instance ur_device_handle_t Device, ///< [in] handle to device associated with binary. @@ -115,17 +118,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( +ur_result_t urProgramBuild( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in] Handle of the program to build. const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { std::vector Devices = Context->getDevices(); - return urProgramBuildExp(Program, Devices.size(), Devices.data(), Options); + return ur::level_zero::urProgramBuildExp(Program, Devices.size(), + Devices.data(), Options); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( +ur_result_t urProgramBuildExp( ur_program_handle_t hProgram, ///< [in] Handle of the program to build. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -228,7 +232,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( +ur_result_t urProgramCompileExp( ur_program_handle_t hProgram, ///< [in][out] handle of the program to compile. uint32_t numDevices, ///< [in] number of devices @@ -239,10 +243,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( ) { std::ignore = numDevices; std::ignore = phDevices; - return urProgramCompile(hProgram->Context, hProgram, pOptions); + return ur::level_zero::urProgramCompile(hProgram->Context, hProgram, + pOptions); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( +ur_result_t urProgramCompile( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in][out] handle of the program to compile. @@ -281,7 +286,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( +ur_result_t urProgramLink( ur_context_handle_t Context, ///< [in] handle of the context instance. uint32_t Count, ///< [in] number of program handles in `phPrograms`. const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to @@ -292,11 +297,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( *Program ///< [out] pointer to handle of program object created. ) { std::vector Devices = Context->getDevices(); - return urProgramLinkExp(Context, Devices.size(), Devices.data(), Count, - Programs, Options, Program); + return ur::level_zero::urProgramLinkExp(Context, Devices.size(), + Devices.data(), Count, Programs, + Options, Program); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( +ur_result_t urProgramLinkExp( ur_context_handle_t hContext, ///< [in] handle of the context instance. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -482,14 +488,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( return UrResult; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain( +ur_result_t urProgramRetain( ur_program_handle_t Program ///< [in] handle for the Program to retain ) { Program->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease( +ur_result_t urProgramRelease( ur_program_handle_t Program ///< [in] handle for the Program to release ) { if (!Program->RefCount.decrementAndTest()) @@ -526,7 +532,7 @@ static bool is_in_separated_string(const std::string &str, char delimiter, return false; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( +ur_result_t urProgramGetFunctionPointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve pointer for. ur_program_handle_t @@ -566,12 +572,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) { size_t Size; *FunctionPointerRet = 0; - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, - &Size)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size)); std::string ClResult(Size, ' '); - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, ClResult.size(), &ClResult[0], + nullptr)); // Get rid of the null terminator and search for kernel_name // If function can be found return error code to indicate it @@ -591,7 +598,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( +ur_result_t urProgramGetGlobalVariablePointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve the pointer for. ur_program_handle_t @@ -626,7 +633,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( +ur_result_t urProgramGetInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_program_info_t PropName, ///< [in] name of the Program property to query size_t PropSize, ///< [in] the size of the Program property. @@ -818,7 +825,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( +ur_result_t urProgramGetBuildInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_device_handle_t Device, ///< [in] handle of the Device object ur_program_build_info_t @@ -898,7 +905,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( +ur_result_t urProgramSetSpecializationConstant( ur_program_handle_t Program, ///< [in] handle of the Program object uint32_t SpecId, ///< [in] specification constant Id size_t SpecSize, ///< [in] size of the specialization constant value @@ -913,7 +920,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( +ur_result_t urProgramGetNativeHandle( ur_program_handle_t Program, ///< [in] handle of the program. ur_native_handle_t *NativeProgram ///< [out] a pointer to the native ///< handle of the program. @@ -934,7 +941,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( +ur_result_t urProgramCreateWithNativeHandle( ur_native_handle_t NativeProgram, ///< [in] the native handle of the program. ur_context_handle_t Context, ///< [in] handle of the context instance @@ -966,6 +973,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t Program, ///< [in] handle of the Program object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in][range(0, count)] array of specialization + ///< constant value descriptions +) { + std::scoped_lock Guard(Program->Mutex); + + // Remember the value of this specialization constant until the program is + // built. Note that we only save the pointer to the buffer that contains the + // value. The caller is responsible for maintaining storage for this buffer. + // + // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by + // SpecID. + for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { + uint32_t SpecId = SpecConstants[SpecIt].id; + Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; + } + return UR_RESULT_SUCCESS; +} + +} // namespace ur::level_zero + ur_program_handle_t_::~ur_program_handle_t_() { if (!resourcesReleased) { ur_release_program_resources(true); @@ -1000,25 +1031,3 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { resourcesReleased = true; } } - -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( - ur_program_handle_t Program, ///< [in] handle of the Program object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in][range(0, count)] array of specialization - ///< constant value descriptions -) { - std::scoped_lock Guard(Program->Mutex); - - // Remember the value of this specialization constant until the program is - // built. Note that we only save the pointer to the buffer that contains the - // value. The caller is responsible for maintaining storage for this buffer. - // - // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by - // SpecID. - for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { - uint32_t SpecId = SpecConstants[SpecIt].id; - Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; - } - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 47dddac89b..9757dad74f 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -19,7 +19,7 @@ #include "common.hpp" #include "event.hpp" #include "queue.hpp" -#include "ur_api.h" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include "ze_api.h" @@ -342,7 +342,9 @@ ur_result_t resetCommandLists(ur_queue_handle_t Queue) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo( +namespace ur::level_zero { + +ur_result_t urQueueGetInfo( ur_queue_handle_t Queue, ///< [in] handle of the queue object ur_queue_info_t ParamName, ///< [in] name of the queue property to query size_t ParamValueSize, ///< [in] size in bytes of the queue property value @@ -466,7 +468,7 @@ static bool doEagerInit = [] { return EagerInit ? std::atoi(EagerInit) != 0 : false; }(); -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( +ur_result_t urQueueCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_queue_properties_t @@ -575,7 +577,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( +ur_result_t urQueueRetain( ur_queue_handle_t Queue ///< [in] handle of the queue object to get access ) { { @@ -586,7 +588,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( +ur_result_t urQueueRelease( ur_queue_handle_t Queue ///< [in] handle of the queue object to release ) { std::vector EventListToCleanup; @@ -689,7 +691,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( +ur_result_t urQueueGetNativeHandle( ur_queue_handle_t Queue, ///< [in] handle of the queue. ur_queue_native_desc_t *Desc, ur_native_handle_t @@ -726,24 +728,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( return UR_RESULT_SUCCESS; } -void ur_queue_handle_t_::ur_queue_group_t::setImmCmdList( - ur_queue_handle_t queue, ze_command_list_handle_t ZeCommandList) { - // An immediate command list was given to us but we don't have the queue - // descriptor information. Create a dummy and note that it is not recycleable. - ZeStruct ZeQueueDesc; - - ImmCmdLists = std::vector( - 1, - Queue->CommandListMap - .insert(std::pair{ - ZeCommandList, - ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, - queue->useCompletionBatching(), false, - false, true)}) - .first); -} - -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( +ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t NativeQueue, ///< [in] the native handle of the queue. ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, /// @@ -783,12 +768,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( uint32_t NumEntries = 1; ur_platform_handle_t Platform{}; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumEntries, + &Platform, nullptr)); ur_device_handle_t UrDevice = Device; if (UrDevice == nullptr) { - UR_CALL(urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, NumEntries, &UrDevice, - nullptr)); + UR_CALL(ur::level_zero::urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, + NumEntries, &UrDevice, nullptr)); } // The NativeHandleDesc has value if if the native handle is an immediate @@ -834,7 +820,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( +ur_result_t urQueueFinish( ur_queue_handle_t Queue ///< [in] handle of the queue to be finished. ) { if (Queue->UsingImmCmdLists) { @@ -901,13 +887,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush( +ur_result_t urQueueFlush( ur_queue_handle_t Queue ///< [in] handle of the queue to be flushed. ) { std::scoped_lock Lock(Queue->Mutex); return Queue->executeAllOpenCommandLists(); } +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = hKernel; + std::ignore = workDim; + std::ignore = pGlobalWorkSize; + std::ignore = pLocalWorkSize; + std::ignore = numPropsInLaunchPropList; + std::ignore = launchPropList; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + + logger::error("[UR][L0] {} function not implemented!", + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + // Configuration of the command-list batching. struct zeCommandListBatchConfig { // Default value of 0. This specifies to use dynamic batch size adjustment. @@ -1906,7 +1917,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, // event will not be waited/released by SYCL RT, so it must be destroyed by // EventRelease in resetCommandList. if (!IsInternal) - UR_CALL(urEventRetain(*Event)); + UR_CALL(ur::level_zero::urEventRetain(*Event)); return UR_RESULT_SUCCESS; } @@ -2118,6 +2129,23 @@ ur_queue_handle_t_::eventOpenCommandList(ur_event_handle_t Event) { return CommandListMap.end(); } +void ur_queue_handle_t_::ur_queue_group_t::setImmCmdList( + ur_queue_handle_t queue, ze_command_list_handle_t ZeCommandList) { + // An immediate command list was given to us but we don't have the queue + // descriptor information. Create a dummy and note that it is not recycleable. + ZeStruct ZeQueueDesc; + + ImmCmdLists = std::vector( + 1, + Queue->CommandListMap + .insert(std::pair{ + ZeCommandList, + ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, + queue->useCompletionBatching(), false, + false, true)}) + .first); +} + ur_queue_handle_t_::ur_queue_group_t & ur_queue_handle_t_::getQueueGroup(bool UseCopyEngine) { auto &Map = (UseCopyEngine ? CopyQueueGroupsByTID : ComputeQueueGroupsByTID); diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 592a2808aa..699d7ec960 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/sampler.cpp b/source/adapters/level_zero/sampler.cpp index 54ca1b6672..d48e6aeede 100644 --- a/source/adapters/level_zero/sampler.cpp +++ b/source/adapters/level_zero/sampler.cpp @@ -12,7 +12,9 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( +namespace ur::level_zero { + +ur_result_t urSamplerCreate( ur_context_handle_t Context, ///< [in] handle of the context object const ur_sampler_desc_t *Props, ///< [in] specifies a list of sampler property names and their @@ -109,17 +111,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to get access +ur_result_t +urSamplerRetain(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to get access ) { Sampler->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to release +ur_result_t +urSamplerRelease(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to release ) { if (!Sampler->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( +ur_result_t urSamplerGetInfo( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler object ur_sampler_info_t PropName, ///< [in] name of the sampler property to query size_t PropValueSize, ///< [in] size in bytes of the sampler property value @@ -152,7 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( +ur_result_t urSamplerGetNativeHandle( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler. ur_native_handle_t *NativeSampler ///< [out] a pointer to the native ///< handle of the sampler. @@ -164,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t NativeSampler, ///< [in] the native handle of the sampler. ur_context_handle_t Context, ///< [in] handle of the context object @@ -182,3 +184,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 8941f756ea..9bdd672818 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -1,19 +1,19 @@ -//===--------- ur_interface_loader.cpp - Level Zero Adapter----------------===// +//===--------- ur_interface_loader.cpp - Level Zero Adapter ------------===// // -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM // Exceptions. See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include #include -namespace { +#include "ur_interface_loader.hpp" -ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { +static ur_result_t validateProcInputs(ur_api_version_t version, + void *pDdiTable) { if (nullptr == pDdiTable) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -24,475 +24,592 @@ ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { } return UR_RESULT_SUCCESS; } -} // namespace -#if defined(__cplusplus) +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +#elif defined(__cplusplus) extern "C" { #endif -UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_global_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t version, ur_global_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnAdapterGet = urAdapterGet; - pDdiTable->pfnAdapterRelease = urAdapterRelease; - pDdiTable->pfnAdapterRetain = urAdapterRetain; - pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; - pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; - return retVal; + pDdiTable->pfnAdapterGet = ur::level_zero::urAdapterGet; + pDdiTable->pfnAdapterRelease = ur::level_zero::urAdapterRelease; + pDdiTable->pfnAdapterRetain = ur::level_zero::urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = ur::level_zero::urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = ur::level_zero::urAdapterGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_context_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urContextCreate; - pDdiTable->pfnRetain = urContextRetain; - pDdiTable->pfnRelease = urContextRelease; - pDdiTable->pfnGetInfo = urContextGetInfo; - pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; - pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + pDdiTable->pfnUnsampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = + ur::level_zero::urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = ur::level_zero::urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + ur::level_zero::urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = + ur::level_zero::urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = ur::level_zero::urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = + ur::level_zero::urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = + ur::level_zero::urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = ur::level_zero::urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportExternalMemoryExp = + ur::level_zero::urBindlessImagesImportExternalMemoryExp; + pDdiTable->pfnMapExternalArrayExp = + ur::level_zero::urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnMapExternalLinearMemoryExp = + ur::level_zero::urBindlessImagesMapExternalLinearMemoryExp; + pDdiTable->pfnReleaseExternalMemoryExp = + ur::level_zero::urBindlessImagesReleaseExternalMemoryExp; + pDdiTable->pfnImportExternalSemaphoreExp = + ur::level_zero::urBindlessImagesImportExternalSemaphoreExp; + pDdiTable->pfnReleaseExternalSemaphoreExp = + ur::level_zero::urBindlessImagesReleaseExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + ur::level_zero::urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + ur::level_zero::urBindlessImagesSignalExternalSemaphoreExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_enqueue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; - pDdiTable->pfnEventsWait = urEnqueueEventsWait; - pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; - pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; - pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; - pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; - pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; - pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; - pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; - pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; - pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; - pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; - pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; - pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; - pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; - pDdiTable->pfnUSMFill = urEnqueueUSMFill; - pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; - pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; - pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; - pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; - pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; - pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; - pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; - - return retVal; + pDdiTable->pfnCreateExp = ur::level_zero::urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = ur::level_zero::urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = ur::level_zero::urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = + ur::level_zero::urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur::level_zero::urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = + ur::level_zero::urCommandBufferAppendUSMFillExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur::level_zero::urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur::level_zero::urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur::level_zero::urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur::level_zero::urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur::level_zero::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnEnqueueExp = ur::level_zero::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = + ur::level_zero::urCommandBufferRetainCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur::level_zero::urCommandBufferReleaseCommandExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur::level_zero::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = ur::level_zero::urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur::level_zero::urCommandBufferCommandGetInfoExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_event_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t version, ur_context_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urEventGetInfo; - pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; - pDdiTable->pfnWait = urEventWait; - pDdiTable->pfnRetain = urEventRetain; - pDdiTable->pfnRelease = urEventRelease; - pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; - pDdiTable->pfnSetCallback = urEventSetCallback; - - return retVal; + + pDdiTable->pfnCreate = ur::level_zero::urContextCreate; + pDdiTable->pfnRetain = ur::level_zero::urContextRetain; + pDdiTable->pfnRelease = ur::level_zero::urContextRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urContextGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urContextGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urContextCreateWithNativeHandle; + pDdiTable->pfnSetExtendedDeleter = + ur::level_zero::urContextSetExtendedDeleter; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_kernel_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urKernelCreate; - pDdiTable->pfnGetInfo = urKernelGetInfo; - pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; - pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; - pDdiTable->pfnRetain = urKernelRetain; - pDdiTable->pfnRelease = urKernelRelease; - pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; - pDdiTable->pfnSetArgValue = urKernelSetArgValue; - pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; - pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; - pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; - pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants; - pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize; - return retVal; + + pDdiTable->pfnKernelLaunch = ur::level_zero::urEnqueueKernelLaunch; + pDdiTable->pfnEventsWait = ur::level_zero::urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = + ur::level_zero::urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnMemBufferRead = ur::level_zero::urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferWrite = ur::level_zero::urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferReadRect = ur::level_zero::urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWriteRect = + ur::level_zero::urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemBufferCopy = ur::level_zero::urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = ur::level_zero::urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = ur::level_zero::urEnqueueMemBufferFill; + pDdiTable->pfnMemImageRead = ur::level_zero::urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = ur::level_zero::urEnqueueMemImageWrite; + pDdiTable->pfnMemImageCopy = ur::level_zero::urEnqueueMemImageCopy; + pDdiTable->pfnMemBufferMap = ur::level_zero::urEnqueueMemBufferMap; + pDdiTable->pfnMemUnmap = ur::level_zero::urEnqueueMemUnmap; + pDdiTable->pfnUSMFill = ur::level_zero::urEnqueueUSMFill; + pDdiTable->pfnUSMMemcpy = ur::level_zero::urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = ur::level_zero::urEnqueueUSMPrefetch; + pDdiTable->pfnUSMAdvise = ur::level_zero::urEnqueueUSMAdvise; + pDdiTable->pfnUSMFill2D = ur::level_zero::urEnqueueUSMFill2D; + pDdiTable->pfnUSMMemcpy2D = ur::level_zero::urEnqueueUSMMemcpy2D; + pDdiTable->pfnDeviceGlobalVariableWrite = + ur::level_zero::urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnDeviceGlobalVariableRead = + ur::level_zero::urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe; + pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnImageCreate = urMemImageCreate; - pDdiTable->pfnBufferCreate = urMemBufferCreate; - pDdiTable->pfnRetain = urMemRetain; - pDdiTable->pfnRelease = urMemRelease; - pDdiTable->pfnBufferPartition = urMemBufferPartition; - pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; - pDdiTable->pfnBufferCreateWithNativeHandle = - urMemBufferCreateWithNativeHandle; - pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urMemGetInfo; - pDdiTable->pfnImageGetInfo = urMemImageGetInfo; - return retVal; + pDdiTable->pfnKernelLaunchCustomExp = + ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur::level_zero::urEnqueueCooperativeKernelLaunchExp; + pDdiTable->pfnTimestampRecordingExp = + ur::level_zero::urEnqueueTimestampRecordingExp; + pDdiTable->pfnNativeCommandExp = ur::level_zero::urEnqueueNativeCommandExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_platform_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t version, ur_event_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urPlatformGet; - pDdiTable->pfnGetInfo = urPlatformGetInfo; - pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; - pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; - pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; - - return retVal; -} -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { + pDdiTable->pfnGetInfo = ur::level_zero::urEventGetInfo; + pDdiTable->pfnGetProfilingInfo = ur::level_zero::urEventGetProfilingInfo; + pDdiTable->pfnWait = ur::level_zero::urEventWait; + pDdiTable->pfnRetain = ur::level_zero::urEventRetain; + pDdiTable->pfnRelease = ur::level_zero::urEventRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urEventGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urEventCreateWithNativeHandle; + pDdiTable->pfnSetCallback = ur::level_zero::urEventSetCallback; + + return result; +} - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; - pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; - pDdiTable->pfnBuild = urProgramBuild; - pDdiTable->pfnCompile = urProgramCompile; - pDdiTable->pfnLink = urProgramLink; - pDdiTable->pfnRetain = urProgramRetain; - pDdiTable->pfnRelease = urProgramRelease; - pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; - pDdiTable->pfnGetGlobalVariablePointer = urProgramGetGlobalVariablePointer; - pDdiTable->pfnGetInfo = urProgramGetInfo; - pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + + pDdiTable->pfnCreate = ur::level_zero::urKernelCreate; + pDdiTable->pfnGetInfo = ur::level_zero::urKernelGetInfo; + pDdiTable->pfnGetGroupInfo = ur::level_zero::urKernelGetGroupInfo; + pDdiTable->pfnGetSubGroupInfo = ur::level_zero::urKernelGetSubGroupInfo; + pDdiTable->pfnRetain = ur::level_zero::urKernelRetain; + pDdiTable->pfnRelease = ur::level_zero::urKernelRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urKernelGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urKernelCreateWithNativeHandle; + pDdiTable->pfnGetSuggestedLocalWorkSize = + ur::level_zero::urKernelGetSuggestedLocalWorkSize; + pDdiTable->pfnSetArgValue = ur::level_zero::urKernelSetArgValue; + pDdiTable->pfnSetArgLocal = ur::level_zero::urKernelSetArgLocal; + pDdiTable->pfnSetArgPointer = ur::level_zero::urKernelSetArgPointer; + pDdiTable->pfnSetExecInfo = ur::level_zero::urKernelSetExecInfo; + pDdiTable->pfnSetArgSampler = ur::level_zero::urKernelSetArgSampler; + pDdiTable->pfnSetArgMemObj = ur::level_zero::urKernelSetArgMemObj; pDdiTable->pfnSetSpecializationConstants = - urProgramSetSpecializationConstants; - pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + ur::level_zero::urKernelSetSpecializationConstants; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_queue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urQueueGetInfo; - pDdiTable->pfnCreate = urQueueCreate; - pDdiTable->pfnRetain = urQueueRetain; - pDdiTable->pfnRelease = urQueueRelease; - pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; - pDdiTable->pfnFinish = urQueueFinish; - pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur::level_zero::urKernelSuggestMaxCooperativeGroupCountExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_sampler_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnRetain = urSamplerRetain; - pDdiTable->pfnRelease = urSamplerRelease; - pDdiTable->pfnGetInfo = urSamplerGetInfo; - pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; - - return retVal; + + pDdiTable->pfnImageCreate = ur::level_zero::urMemImageCreate; + pDdiTable->pfnBufferCreate = ur::level_zero::urMemBufferCreate; + pDdiTable->pfnRetain = ur::level_zero::urMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urMemRelease; + pDdiTable->pfnBufferPartition = ur::level_zero::urMemBufferPartition; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urMemGetNativeHandle; + pDdiTable->pfnBufferCreateWithNativeHandle = + ur::level_zero::urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = + ur::level_zero::urMemImageCreateWithNativeHandle; + pDdiTable->pfnGetInfo = ur::level_zero::urMemGetInfo; + pDdiTable->pfnImageGetInfo = ur::level_zero::urMemImageGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_usm_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( + ur_api_version_t version, ur_physical_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnHostAlloc = urUSMHostAlloc; - pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; - pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; - pDdiTable->pfnFree = urUSMFree; - pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; - pDdiTable->pfnPoolCreate = urUSMPoolCreate; - pDdiTable->pfnPoolRetain = urUSMPoolRetain; - pDdiTable->pfnPoolRelease = urUSMPoolRelease; - pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; - - return retVal; + pDdiTable->pfnCreate = ur::level_zero::urPhysicalMemCreate; + pDdiTable->pfnRetain = ur::level_zero::urPhysicalMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urPhysicalMemRelease; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_device_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urDeviceGet; - pDdiTable->pfnGetInfo = urDeviceGetInfo; - pDdiTable->pfnRetain = urDeviceRetain; - pDdiTable->pfnRelease = urDeviceRelease; - pDdiTable->pfnPartition = urDevicePartition; - pDdiTable->pfnSelectBinary = urDeviceSelectBinary; - pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; - pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; - - return retVal; + + pDdiTable->pfnGet = ur::level_zero::urPlatformGet; + pDdiTable->pfnGetInfo = ur::level_zero::urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urPlatformGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urPlatformCreateWithNativeHandle; + pDdiTable->pfnGetApiVersion = ur::level_zero::urPlatformGetApiVersion; + pDdiTable->pfnGetBackendOption = ur::level_zero::urPlatformGetBackendOption; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_command_buffer_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t version, ur_program_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateExp = urCommandBufferCreateExp; - pDdiTable->pfnRetainExp = urCommandBufferRetainExp; - pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; - pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; - pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; - pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; - pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMemBufferCopyRectExp = - urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMemBufferReadRectExp = - urCommandBufferAppendMemBufferReadRectExp; - pDdiTable->pfnAppendMemBufferWriteExp = - urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMemBufferWriteRectExp = - urCommandBufferAppendMemBufferWriteRectExp; - pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; - pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; - pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; - pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; - pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; - pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; - pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; - - return retVal; + + pDdiTable->pfnCreateWithIL = ur::level_zero::urProgramCreateWithIL; + pDdiTable->pfnCreateWithBinary = ur::level_zero::urProgramCreateWithBinary; + pDdiTable->pfnBuild = ur::level_zero::urProgramBuild; + pDdiTable->pfnCompile = ur::level_zero::urProgramCompile; + pDdiTable->pfnLink = ur::level_zero::urProgramLink; + pDdiTable->pfnRetain = ur::level_zero::urProgramRetain; + pDdiTable->pfnRelease = ur::level_zero::urProgramRelease; + pDdiTable->pfnGetFunctionPointer = + ur::level_zero::urProgramGetFunctionPointer; + pDdiTable->pfnGetGlobalVariablePointer = + ur::level_zero::urProgramGetGlobalVariablePointer; + pDdiTable->pfnGetInfo = ur::level_zero::urProgramGetInfo; + pDdiTable->pfnGetBuildInfo = ur::level_zero::urProgramGetBuildInfo; + pDdiTable->pfnSetSpecializationConstants = + ur::level_zero::urProgramSetSpecializationConstants; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urProgramGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urProgramCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( - ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; - pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; - pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; - return retVal; + pDdiTable->pfnBuildExp = ur::level_zero::urProgramBuildExp; + pDdiTable->pfnCompileExp = ur::level_zero::urProgramCompileExp; + pDdiTable->pfnLinkExp = ur::level_zero::urProgramLinkExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( - ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnUnsampledImageHandleDestroyExp = - urBindlessImagesUnsampledImageHandleDestroyExp; - pDdiTable->pfnSampledImageHandleDestroyExp = - urBindlessImagesSampledImageHandleDestroyExp; - pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; - pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; - pDdiTable->pfnUnsampledImageCreateExp = - urBindlessImagesUnsampledImageCreateExp; - pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; - pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; - pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; - pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; - pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; - pDdiTable->pfnImportExternalMemoryExp = - urBindlessImagesImportExternalMemoryExp; - pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; - pDdiTable->pfnMapExternalLinearMemoryExp = - urBindlessImagesMapExternalLinearMemoryExp; - pDdiTable->pfnReleaseExternalMemoryExp = - urBindlessImagesReleaseExternalMemoryExp; - pDdiTable->pfnImportExternalSemaphoreExp = - urBindlessImagesImportExternalSemaphoreExp; - pDdiTable->pfnReleaseExternalSemaphoreExp = - urBindlessImagesReleaseExternalSemaphoreExp; - pDdiTable->pfnWaitExternalSemaphoreExp = - urBindlessImagesWaitExternalSemaphoreExp; - pDdiTable->pfnSignalExternalSemaphoreExp = - urBindlessImagesSignalExternalSemaphoreExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnGetInfo = ur::level_zero::urQueueGetInfo; + pDdiTable->pfnCreate = ur::level_zero::urQueueCreate; + pDdiTable->pfnRetain = ur::level_zero::urQueueRetain; + pDdiTable->pfnRelease = ur::level_zero::urQueueRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urQueueGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = ur::level_zero::urQueueFinish; + pDdiTable->pfnFlush = ur::level_zero::urQueueFlush; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( - ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; - pDdiTable->pfnImportExp = urUSMImportExp; - pDdiTable->pfnReleaseExp = urUSMReleaseExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur::level_zero::urSamplerCreate; + pDdiTable->pfnRetain = ur::level_zero::urSamplerRetain; + pDdiTable->pfnRelease = ur::level_zero::urSamplerRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urSamplerGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urSamplerCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_virtual_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnFree = urVirtualMemFree; - pDdiTable->pfnGetInfo = urVirtualMemGetInfo; - pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; - pDdiTable->pfnMap = urVirtualMemMap; - pDdiTable->pfnReserve = urVirtualMemReserve; - pDdiTable->pfnSetAccess = urVirtualMemSetAccess; - pDdiTable->pfnUnmap = urVirtualMemUnmap; - - return retVal; + pDdiTable->pfnHostAlloc = ur::level_zero::urUSMHostAlloc; + pDdiTable->pfnDeviceAlloc = ur::level_zero::urUSMDeviceAlloc; + pDdiTable->pfnSharedAlloc = ur::level_zero::urUSMSharedAlloc; + pDdiTable->pfnFree = ur::level_zero::urUSMFree; + pDdiTable->pfnGetMemAllocInfo = ur::level_zero::urUSMGetMemAllocInfo; + pDdiTable->pfnPoolCreate = ur::level_zero::urUSMPoolCreate; + pDdiTable->pfnPoolRetain = ur::level_zero::urUSMPoolRetain; + pDdiTable->pfnPoolRelease = ur::level_zero::urUSMPoolRelease; + pDdiTable->pfnPoolGetInfo = ur::level_zero::urUSMPoolGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_physical_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urPhysicalMemCreate; - pDdiTable->pfnRelease = urPhysicalMemRelease; - pDdiTable->pfnRetain = urPhysicalMemRetain; + pDdiTable->pfnPitchedAllocExp = ur::level_zero::urUSMPitchedAllocExp; + pDdiTable->pfnImportExp = ur::level_zero::urUSMImportExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urUSMReleaseExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( - ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnCooperativeKernelLaunchExp = - urEnqueueCooperativeKernelLaunchExp; - pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; - pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; + pDdiTable->pfnEnablePeerAccessExp = + ur::level_zero::urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = + ur::level_zero::urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = + ur::level_zero::urUsmP2PPeerAccessGetInfoExp; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( - ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( + ur_api_version_t version, ur_virtual_mem_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = - urKernelSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnGranularityGetInfo = + ur::level_zero::urVirtualMemGranularityGetInfo; + pDdiTable->pfnReserve = ur::level_zero::urVirtualMemReserve; + pDdiTable->pfnFree = ur::level_zero::urVirtualMemFree; + pDdiTable->pfnMap = ur::level_zero::urVirtualMemMap; + pDdiTable->pfnUnmap = ur::level_zero::urVirtualMemUnmap; + pDdiTable->pfnSetAccess = ur::level_zero::urVirtualMemSetAccess; + pDdiTable->pfnGetInfo = ur::level_zero::urVirtualMemGetInfo; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t version, ur_device_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnBuildExp = urProgramBuildExp; - pDdiTable->pfnCompileExp = urProgramCompileExp; - pDdiTable->pfnLinkExp = urProgramLinkExp; - - return UR_RESULT_SUCCESS; + pDdiTable->pfnGet = ur::level_zero::urDeviceGet; + pDdiTable->pfnGetInfo = ur::level_zero::urDeviceGetInfo; + pDdiTable->pfnRetain = ur::level_zero::urDeviceRetain; + pDdiTable->pfnRelease = ur::level_zero::urDeviceRelease; + pDdiTable->pfnPartition = ur::level_zero::urDevicePartition; + pDdiTable->pfnSelectBinary = ur::level_zero::urDeviceSelectBinary; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urDeviceGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urDeviceCreateWithNativeHandle; + pDdiTable->pfnGetGlobalTimestamps = + ur::level_zero::urDeviceGetGlobalTimestamps; + + return result; } -#if defined(__cplusplus) + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +} // namespace ur::level_zero +#elif defined(__cplusplus) } // extern "C" #endif + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + + result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Global); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetBindlessImagesExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetCommandBufferExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Event); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->KernelExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Queue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); + if (result != UR_RESULT_SUCCESS) + return result; + + return result; +} +} // namespace ur::level_zero +#endif diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp new file mode 100644 index 0000000000..f95625dd5b --- /dev/null +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -0,0 +1,706 @@ +//===--------- ur_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace ur::level_zero { +ur_result_t urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters); +ur_result_t urAdapterRelease(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterRetain(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterGetLastError(ur_adapter_handle_t hAdapter, + const char **ppMessage, int32_t *pError); +ur_result_t urAdapterGetInfo(ur_adapter_handle_t hAdapter, + ur_adapter_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGet(ur_adapter_handle_t *phAdapters, uint32_t NumAdapters, + uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, + uint32_t *pNumPlatforms); +ur_result_t urPlatformGetInfo(ur_platform_handle_t hPlatform, + ur_platform_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGetApiVersion(ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion); +ur_result_t urPlatformGetNativeHandle(ur_platform_handle_t hPlatform, + ur_native_handle_t *phNativePlatform); +ur_result_t urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, ur_adapter_handle_t hAdapter, + const ur_platform_native_properties_t *pProperties, + ur_platform_handle_t *phPlatform); +ur_result_t urPlatformGetBackendOption(ur_platform_handle_t hPlatform, + const char *pFrontendOption, + const char **ppPlatformOption); +ur_result_t urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, uint32_t NumEntries, + ur_device_handle_t *phDevices, uint32_t *pNumDevices); +ur_result_t urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urDeviceRetain(ur_device_handle_t hDevice); +ur_result_t urDeviceRelease(ur_device_handle_t hDevice); +ur_result_t +urDevicePartition(ur_device_handle_t hDevice, + const ur_device_partition_properties_t *pProperties, + uint32_t NumDevices, ur_device_handle_t *phSubDevices, + uint32_t *pNumDevicesRet); +ur_result_t urDeviceSelectBinary(ur_device_handle_t hDevice, + const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, + uint32_t *pSelectedBinary); +ur_result_t urDeviceGetNativeHandle(ur_device_handle_t hDevice, + ur_native_handle_t *phNativeDevice); +ur_result_t +urDeviceCreateWithNativeHandle(ur_native_handle_t hNativeDevice, + ur_adapter_handle_t hAdapter, + const ur_device_native_properties_t *pProperties, + ur_device_handle_t *phDevice); +ur_result_t urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, + uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp); +ur_result_t urContextCreate(uint32_t DeviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t urContextRetain(ur_context_handle_t hContext); +ur_result_t urContextRelease(ur_context_handle_t hContext); +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext); +ur_result_t urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter, + uint32_t numDevices, const ur_device_handle_t *phDevices, + const ur_context_native_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData); +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem); +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer); +ur_result_t urMemRetain(ur_mem_handle_t hMem); +ur_result_t urMemRelease(ur_mem_handle_t hMem); +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem); +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem); +ur_result_t urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler); +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler); +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler); +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler); +ur_result_t urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + const ur_sampler_native_properties_t *pProperties, + ur_sampler_handle_t *phSampler); +ur_result_t urUSMHostAlloc(ur_context_handle_t hContext, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMDeviceAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMSharedAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMFree(ur_context_handle_t hContext, void *pMem); +ur_result_t urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urUSMPoolCreate(ur_context_handle_t hContext, + ur_usm_pool_desc_t *pPoolDesc, + ur_usm_pool_handle_t *ppPool); +ur_result_t urUSMPoolRetain(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool, + ur_usm_pool_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemGranularityGetInfo( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_virtual_mem_granularity_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart); +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem); +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, + size_t length, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramBuild(ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions); +ur_result_t urProgramCompile(ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const char *pOptions); +ur_result_t urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, ur_program_handle_t *phProgram); +ur_result_t urProgramRetain(ur_program_handle_t hProgram); +ur_result_t urProgramRelease(ur_program_handle_t hProgram); +ur_result_t urProgramGetFunctionPointer(ur_device_handle_t hDevice, + ur_program_handle_t hProgram, + const char *pFunctionName, + void **ppFunctionPointer); +ur_result_t urProgramGetGlobalVariablePointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pGlobalVariableName, size_t *pGlobalVariableSizeRet, + void **ppGlobalVariablePointerRet); +ur_result_t urProgramGetInfo(ur_program_handle_t hProgram, + ur_program_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urProgramGetBuildInfo(ur_program_handle_t hProgram, + ur_device_handle_t hDevice, + ur_program_build_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urProgramGetNativeHandle(ur_program_handle_t hProgram, + ur_native_handle_t *phNativeProgram); +ur_result_t urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, + const ur_program_native_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue); +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties); +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelRetain(ur_kernel_handle_t hKernel); +ur_result_t urKernelRelease(ur_kernel_handle_t hKernel); +ur_result_t +urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *pProperties, + const void *pArgValue); +ur_result_t +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, + const ur_kernel_exec_info_properties_t *pProperties, + const void *pPropValue); +ur_result_t +urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *pProperties, + ur_sampler_handle_t hArgValue); +ur_result_t +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *pProperties, + ur_mem_handle_t hArgValue); +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t hKernel, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel); +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize); +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueRetain(ur_queue_handle_t hQueue); +ur_result_t urQueueRelease(ur_queue_handle_t hQueue); +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue); +ur_result_t urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueFinish(ur_queue_handle_t hQueue); +ur_result_t urQueueFlush(ur_queue_handle_t hQueue); +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList); +ur_result_t urEventRetain(ur_event_handle_t hEvent); +ur_result_t urEventRelease(ur_event_handle_t hEvent); +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent); +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent); +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData); +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap); +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch); +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem); +ur_result_t urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, const void *pSrc, void *pDst, + const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, + const ur_image_format_t *pSrcImageFormat, + const ur_image_format_t *pDstImageFormat, + ur_exp_image_copy_region_t *pCopyRegion, + ur_exp_image_copy_flags_t imageCopyFlags, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesImageGetInfoExp( + ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, + ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet); +ur_result_t urBindlessImagesMipmapGetLevelExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem); +ur_result_t urBindlessImagesImportExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + ur_exp_external_mem_type_t memHandleType, + ur_exp_external_mem_desc_t *pExternalMemDesc, + ur_exp_external_mem_handle_t *phExternalMem); +ur_result_t urBindlessImagesMapExternalArrayExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_external_mem_handle_t hExternalMem, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, + uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem); +ur_result_t urBindlessImagesReleaseExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_mem_handle_t hExternalMem); +ur_result_t urBindlessImagesImportExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_type_t semHandleType, + ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, + ur_exp_external_semaphore_handle_t *phExternalSemaphore); +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_handle_t hExternalSemaphore); +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, + bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer); +ur_result_t +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t urCommandBufferAppendKernelLaunchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand); +ur_result_t urCommandBufferAppendUSMMemcpyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, + const void *pPattern, size_t patternSize, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, const void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch); +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, size_t localWorkSize, + size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet); +ur_result_t urEnqueueTimestampRecordingExp( + ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramCompileExp(ur_program_handle_t hProgram, + uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramLinkExp(ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, + ur_program_handle_t *phProgram); +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size); +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem); +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index 096ae076f9..36965c5d58 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 1069ec78da..5296391794 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -17,6 +17,7 @@ #include "usm.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" @@ -296,7 +297,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +namespace ur::level_zero { + +ur_result_t urUSMHostAlloc( ur_context_handle_t Context, ///< [in] handle of the context object const ur_usm_desc_t *USMDesc, ///< [in][optional] USM memory allocation descriptor @@ -335,7 +338,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -368,7 +371,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -410,7 +413,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -448,7 +451,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -513,7 +516,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } umf_memory_pool_handle_t hPoolInternal = nullptr; @@ -555,9 +558,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t Context, ///< [in] handle of the context object - void *Mem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t Context, ///< [in] handle of the context object + void *Mem ///< [in] pointer to USM memory object ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -567,7 +570,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return USMFreeHelper(Context, Mem); } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t Context, ///< [in] handle of the context object const void *Ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -667,6 +670,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return UR_RESULT_SUCCESS; } +ur_result_t urUSMPoolCreate( + ur_context_handle_t Context, ///< [in] handle of the context object + ur_usm_pool_desc_t + *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool +) { + + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + Pool->RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); + delete Pool; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMPoolGetInfo( + ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool + ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query + size_t PropSize, ///< [in] size in bytes of the pool property value provided + void *PropValue, ///< [out][typename(propName, propSize)] value of the pool + ///< property + size_t *PropSizeRet ///< [out] size in bytes returned in pool property value +) { + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } +} + +ur_result_t urUSMImportExp(ur_context_handle_t Context, void *HostPtr, + size_t Size) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Promote the host ptr to USM host memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) { + // Query memory type of the host pointer + ze_device_handle_t ZeDeviceHandle; + ZeStruct ZeMemoryAllocationProperties; + ZE2UR_CALL(zeMemGetAllocProperties, + (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, + &ZeDeviceHandle)); + + // If not shared of any type, we can import the ptr + if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { + // Promote the host ptr to USM host memory + ze_driver_handle_t driverHandle = + Context->getPlatform()->ZeDriverHandleExpTranslated; + ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); + } + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Release the imported memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) + ZeUSMImport.doZeUSMRelease( + Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr)); // Handle When the driver is already released @@ -972,68 +1072,6 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( - ur_context_handle_t Context, ///< [in] handle of the context object - ur_usm_pool_desc_t - *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with - ///< ::ur_usm_pool_limits_desc_t - ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool -) { - - try { - *Pool = reinterpret_cast( - new ur_usm_pool_handle_t_(Context, PoolDesc)); - - std::shared_lock ContextLock(Context->Mutex); - Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); - - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - Pool->RefCount.increment(); - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - if (Pool->RefCount.decrementAndTest()) { - std::shared_lock ContextLock(Pool->Context->Mutex); - Pool->Context->UsmPoolHandles.remove(Pool); - delete Pool; - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urUSMPoolGetInfo( - ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool - ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query - size_t PropSize, ///< [in] size in bytes of the pool property value provided - void *PropValue, ///< [out][typename(propName, propSize)] value of the pool - ///< property - size_t *PropSizeRet ///< [out] size in bytes returned in pool property value -) { - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - - switch (PropName) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: { - return ReturnValue(Pool->RefCount.load()); - } - case UR_USM_POOL_INFO_CONTEXT: { - return ReturnValue(Pool->Context); - } - default: { - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } - } -} - // If indirect access tracking is not enabled then this functions just performs // zeMemFree. If indirect access tracking is enabled then reference counting is // performed. @@ -1116,38 +1154,3 @@ ur_result_t USMFreeHelper(ur_context_handle_t Context, void *Ptr, UR_CALL(ContextReleaseHelper(Context)); return umf2urResult(umfRet); } - -UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context, - void *HostPtr, size_t Size) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Promote the host ptr to USM host memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) { - // Query memory type of the host pointer - ze_device_handle_t ZeDeviceHandle; - ZeStruct ZeMemoryAllocationProperties; - ZE2UR_CALL(zeMemGetAllocProperties, - (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, - &ZeDeviceHandle)); - - // If not shared of any type, we can import the ptr - if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Promote the host ptr to USM host memory - ze_driver_handle_t driverHandle = - Context->getPlatform()->ZeDriverHandleExpTranslated; - ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); - } - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, - void *HostPtr) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Release the imported memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) - ZeUSMImport.doZeUSMRelease( - Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp index 2b81828423..6e701aa803 100644 --- a/source/adapters/level_zero/usm_p2p.cpp +++ b/source/adapters/level_zero/usm_p2p.cpp @@ -11,8 +11,10 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +namespace ur::level_zero { + +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -21,8 +23,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -31,10 +33,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); @@ -69,3 +72,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return ReturnValue(propertyValue); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index dc52874364..eba7359379 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -17,13 +17,14 @@ std::mutex ZeCall::GlobalLock; -ur_result_t UR_APICALL urContextGetNativeHandle( - ur_context_handle_t hContext, ur_native_handle_t *phNativeContext) { +namespace ur::level_zero { +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t hNativeContext, ur_adapter_handle_t hAdapter, uint32_t numDevices, const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, @@ -32,62 +33,63 @@ ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urContextSetExtendedDeleter( - ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter, - void *pUserData) { +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageCreate(ur_context_handle_t hContext, - ur_mem_flags_t flags, - const ur_image_format_t *pImageFormat, - const ur_image_desc_t *pImageDesc, - void *pHost, ur_mem_handle_t *phMem) { +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferCreate( - ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, - const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemRetain(ur_mem_handle_t hMem) { +ur_result_t urMemRetain(ur_mem_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { +ur_result_t urMemRelease(ur_mem_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferPartition( - ur_mem_handle_t hBuffer, ur_mem_flags_t flags, - ur_buffer_create_type_t bufferCreateType, const ur_buffer_region_t *pRegion, - ur_mem_handle_t *phMem) { +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemGetNativeHandle(ur_mem_handle_t hMem, - ur_device_handle_t hDevice, - ur_native_handle_t *phNativeMem) { +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( +ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t hNativeMem, ur_context_handle_t hContext, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { @@ -95,53 +97,51 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, - ur_mem_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, - ur_image_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerCreate(ur_context_handle_t hContext, - const ur_sampler_desc_t *pDesc, - ur_sampler_handle_t *phSampler) { +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerRetain(ur_sampler_handle_t hSampler) { +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerRelease(ur_sampler_handle_t hSampler) { +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, - ur_sampler_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerGetNativeHandle( - ur_sampler_handle_t hSampler, ur_native_handle_t *phNativeSampler) { +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, const ur_sampler_native_properties_t *pProperties, ur_sampler_handle_t *phSampler) { @@ -149,7 +149,7 @@ ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -157,111 +157,109 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemReserve(ur_context_handle_t hContext, - const void *pStart, size_t size, - void **ppStart) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemFree(ur_context_handle_t hContext, - const void *pStart, size_t size) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemMap(ur_context_handle_t hContext, - const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, - size_t offset, - ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemUnmap(ur_context_handle_t hContext, - const void *pStart, size_t size) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urVirtualMemGetInfo(ur_context_handle_t hContext, - const void *pStart, size_t size, - ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urPhysicalMemCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, - const ur_physical_mem_properties_t *pProperties, - ur_physical_mem_handle_t *phPhysicalMem) { +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetArgLocal( - ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, - const ur_kernel_arg_local_properties_t *pProperties) { +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, - ur_kernel_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, - ur_device_handle_t hDevice, - ur_kernel_group_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL -urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, - ur_kernel_sub_group_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetExecInfo( - ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, size_t propSize, - const ur_kernel_exec_info_properties_t *pProperties, - const void *pPropValue) { +ur_result_t +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, + const ur_kernel_exec_info_properties_t *pProperties, + const void *pPropValue) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_sampler_properties_t *pProperties, ur_sampler_handle_t hArgValue) { @@ -269,7 +267,7 @@ urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *pProperties, ur_mem_handle_t hArgValue) { @@ -277,104 +275,107 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSetSpecializationConstants( +ur_result_t urKernelSetSpecializationConstants( ur_kernel_handle_t hKernel, uint32_t count, const ur_specialization_constant_info_t *pSpecConstants) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetNativeHandle( - ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, - ur_program_handle_t hProgram, - const ur_kernel_native_properties_t *pProperties, - ur_kernel_handle_t *phKernel) { +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( - ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t numWorkDim, - const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, - size_t *pSuggestedLocalWorkSize) { +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, - ur_event_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetProfilingInfo(ur_event_handle_t hEvent, - ur_profiling_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventWait(uint32_t numEvents, - const ur_event_handle_t *phEventWaitList) { +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventGetNativeHandle( - ur_event_handle_t hEvent, ur_native_handle_t *phNativeEvent) { +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, ur_context_handle_t hContext, - const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent, - ur_execution_info_t execStatus, - ur_event_callback_t pfnNotify, - void *pUserData) { +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesUnsampledImageHandleDestroyExp( +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesSampledImageHandleDestroyExp( +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_native_handle_t hImage) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( +ur_result_t urBindlessImagesImageAllocateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -382,14 +383,15 @@ ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( +ur_result_t urBindlessImagesUnsampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -398,7 +400,7 @@ ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( +ur_result_t urBindlessImagesSampledImageCreateExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, @@ -407,14 +409,14 @@ ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -422,14 +424,15 @@ ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_external_mem_desc_t *pExternalMemDesc, @@ -438,7 +441,7 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_external_mem_handle_t hExternalMem, @@ -447,21 +450,21 @@ ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( +ur_result_t urBindlessImagesMapExternalLinearMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, uint64_t offset, uint64_t size, ur_exp_external_mem_handle_t hExternalMem, void **ppRetMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( +ur_result_t urBindlessImagesReleaseExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_mem_handle_t hExternalMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_external_semaphore_desc_t *pExternalSemaphoreDesc, @@ -470,40 +473,41 @@ ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( +ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_handle_t hExternalSemaphore) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferCreateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - ur_exp_command_buffer_handle_t *phCommandBuffer) { +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, @@ -515,7 +519,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -524,7 +528,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, const void *pPattern, size_t patternSize, size_t size, uint32_t numSyncPointsInWaitList, @@ -534,7 +538,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numSyncPointsInWaitList, @@ -544,7 +548,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, const void *pSrc, uint32_t numSyncPointsInWaitList, @@ -554,7 +558,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -563,7 +567,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -575,7 +579,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -587,7 +591,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -599,7 +603,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, const void *pPattern, size_t patternSize, size_t offset, size_t size, uint32_t numSyncPointsInWaitList, @@ -609,7 +613,7 @@ ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, size_t size, ur_usm_migration_flags_t flags, uint32_t numSyncPointsInWaitList, @@ -619,7 +623,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, @@ -628,7 +632,7 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferEnqueueExp( +ur_result_t urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -636,19 +640,19 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t hCommand, const ur_exp_command_buffer_update_kernel_launch_desc_t *pUpdateKernelLaunch) { @@ -656,15 +660,16 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( +ur_result_t urCommandBufferCommandGetInfoExp( ur_exp_command_buffer_command_handle_t hCommand, ur_exp_command_buffer_command_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -672,41 +677,42 @@ ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t hContext, void *pMem, - size_t size) { +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t hContext, - void *pMem) { +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp index 1850a4334c..bb32e0e64a 100644 --- a/source/adapters/level_zero/v2/command_list_cache.hpp +++ b/source/adapters/level_zero/v2/command_list_cache.hpp @@ -14,7 +14,7 @@ #include "latency_tracker.hpp" #include -#include +#include #include #include "../common.hpp" diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp index 08032fe85e..84e3d96b88 100644 --- a/source/adapters/level_zero/v2/context.cpp +++ b/source/adapters/level_zero/v2/context.cpp @@ -72,10 +72,11 @@ bool ur_context_handle_t_::isValidDevice(ur_device_handle_t hDevice) const { return false; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, - const ur_context_properties_t *pProperties, - ur_context_handle_t *phContext) { +namespace ur::level_zero { +ur_result_t urContextCreate(uint32_t deviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext) { std::ignore = pProperties; ur_platform_handle_t hPlatform = phDevices[0]->Platform; @@ -89,23 +90,20 @@ urContextCreate(uint32_t deviceCount, const ur_device_handle_t *phDevices, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urContextRetain(ur_context_handle_t hContext) { +ur_result_t urContextRetain(ur_context_handle_t hContext) { return hContext->retain(); } -UR_APIEXPORT ur_result_t UR_APICALL -urContextRelease(ur_context_handle_t hContext) { +ur_result_t urContextRelease(ur_context_handle_t hContext) { return hContext->release(); } -UR_APIEXPORT ur_result_t UR_APICALL -urContextGetInfo(ur_context_handle_t hContext, - ur_context_info_t contextInfoType, size_t propSize, +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t contextInfoType, size_t propSize, - void *pContextInfo, + void *pContextInfo, - size_t *pPropSizeRet) { + size_t *pPropSizeRet) { std::shared_lock Lock(hContext->Mutex); UrReturnHelper ReturnValue(propSize, pContextInfo, pPropSizeRet); switch ( @@ -121,3 +119,4 @@ urContextGetInfo(ur_context_handle_t hContext, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 3129e3dd3e..8654e0b25a 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -45,10 +45,10 @@ ur_result_t ur_event_handle_t_::release() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - return hEvent->retain(); -} +namespace ur::level_zero { +ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { +ur_result_t urEventRelease(ur_event_handle_t hEvent) { return hEvent->release(); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/event_provider_normal.hpp b/source/adapters/level_zero/v2/event_provider_normal.hpp index 238ab2f360..1260964a4f 100644 --- a/source/adapters/level_zero/v2/event_provider_normal.hpp +++ b/source/adapters/level_zero/v2/event_provider_normal.hpp @@ -23,6 +23,7 @@ #include "event.hpp" #include "../device.hpp" +#include "../ur_interface_loader.hpp" namespace v2 { @@ -50,10 +51,10 @@ class provider_normal : public event_provider { event_type etype, queue_type qtype) : producedType(etype), queueType(qtype), urContext(context), urDevice(device) { - urDeviceRetain(device); + ur::level_zero::urDeviceRetain(device); } - ~provider_normal() override { urDeviceRelease(urDevice); } + ~provider_normal() override { ur::level_zero::urDeviceRelease(urDevice); } event_allocation allocate() override; ur_device_handle_t device() override; diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index daad306b0c..e6a37af814 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -16,6 +16,7 @@ #include "../device.hpp" #include "../platform.hpp" #include "../program.hpp" +#include "../ur_interface_loader.hpp" ur_single_device_kernel_t::ur_single_device_kernel_t(ze_device_handle_t hDevice, ze_kernel_handle_t hKernel, @@ -36,7 +37,7 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *kernelName) : hProgram(hProgram), deviceKernels(hProgram->Context->getPlatform()->getNumDevices()) { - urProgramRetain(hProgram); + ur::level_zero::urProgramRetain(hProgram); for (auto [zeDevice, zeModule] : hProgram->ZeModuleMap) { ZeStruct zeKernelDesc; @@ -81,7 +82,7 @@ ur_result_t ur_kernel_handle_t_::release() { } } - UR_CALL_THROWS(urProgramRelease(hProgram)); + UR_CALL_THROWS(ur::level_zero::urProgramRelease(hProgram)); return UR_RESULT_SUCCESS; } @@ -196,21 +197,22 @@ ur_program_handle_t ur_kernel_handle_t_::getProgramHandle() const { return hProgram; } -UR_APIEXPORT ur_result_t UR_APICALL -urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, - ur_kernel_handle_t *phKernel) { +namespace ur::level_zero { +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel) { *phKernel = new ur_kernel_handle_t_(hProgram, pKernelName); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to retain ) { hKernel->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release ) { if (!hKernel->RefCount.decrementAndTest()) @@ -222,7 +224,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] size_t argSize, ///< [in] size of argument type @@ -235,7 +237,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return hKernel->setArgValue(argIndex, argSize, pProperties, pArgValue); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -246,3 +248,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( TRACK_SCOPE_LATENCY("ur_kernel_handle_t_::setArgPointer"); return hKernel->setArgPointer(argIndex, pProperties, pArgValue); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index 188f7c3102..ea2e931bfe 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -14,31 +14,30 @@ ur_queue_handle_t_::~ur_queue_handle_t_() {} -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +namespace ur::level_zero { +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { +ur_result_t urQueueRetain(ur_queue_handle_t hQueue) { return hQueue->queueRetain(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { +ur_result_t urQueueRelease(ur_queue_handle_t hQueue) { return hQueue->queueRelease(); } -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { +ur_result_t urQueueFinish(ur_queue_handle_t hQueue) { return hQueue->queueFinish(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { +ur_result_t urQueueFlush(ur_queue_handle_t hQueue) { return hQueue->queueFlush(); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -47,27 +46,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( +ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -75,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -87,7 +89,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -99,16 +101,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( +ur_result_t urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -120,16 +124,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( +ur_result_t urEnqueueMemImageRead( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, @@ -138,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( +ur_result_t urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, @@ -147,78 +153,85 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, region, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, size, numEventsInWaitList, phEventWaitList, phEvent, ppRetMap); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *pMem, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -227,7 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( +ur_result_t urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -236,25 +249,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( +ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, const ur_image_format_t *pSrcImageFormat, @@ -267,7 +284,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( pDstImageFormat, pCopyRegion, imageCopyFlags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -275,7 +292,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_external_semaphore_handle_t hSemaphore, bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -283,7 +300,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -292,13 +309,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( +ur_result_t urEnqueueKernelLaunchCustomExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, @@ -310,7 +327,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( numPropsInLaunchPropList, launchPropList, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( +ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, @@ -321,3 +338,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, numEventsInWaitList, phEventWaitList, phEvent); } +} // namespace ur::level_zero \ No newline at end of file diff --git a/source/adapters/level_zero/v2/queue_create.cpp b/source/adapters/level_zero/v2/queue_create.cpp index 938dd5cb64..c72320842b 100644 --- a/source/adapters/level_zero/v2/queue_create.cpp +++ b/source/adapters/level_zero/v2/queue_create.cpp @@ -17,16 +17,18 @@ #include #include -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { +namespace ur::level_zero { +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue) { // TODO: For now, always use immediate, in-order *phQueue = new v2::ur_queue_immediate_in_order_t(hContext, hDevice, pProperties); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( +ur_result_t urQueueCreateWithNativeHandle( ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -38,3 +40,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( logger::error("{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index 6ac5f0f3b6..3706fe21e4 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -22,7 +22,8 @@ ur_context_handle_t ur_usm_pool_handle_t_::getContextHandle() const { return hContext; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( +namespace ur::level_zero { +ur_result_t urUSMPoolCreate( ur_context_handle_t hContext, ///< [in] handle of the context object ur_usm_pool_desc_t * pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with @@ -34,15 +35,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( - ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool ) { hPool->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( - ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t hPool ///< [in] pointer to USM memory pool ) { if (hPool->RefCount.decrementAndTest()) { delete hPool; @@ -50,7 +51,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( +ur_result_t urUSMPoolGetInfo( ur_usm_pool_handle_t hPool, ///< [in] handle of the USM memory pool ur_usm_pool_info_t propName, ///< [in] name of the pool property to query size_t propSize, ///< [in] size in bytes of the pool property value provided @@ -74,7 +75,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t @@ -96,7 +97,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object ur_device_handle_t hDevice, ///< [in] handle of the device object const ur_usm_desc_t @@ -121,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +ur_result_t urUSMHostAlloc( ur_context_handle_t hContext, ///< [in] handle of the context object const ur_usm_desc_t *pUSMDesc, ///< [in][optional] USM memory allocation descriptor @@ -142,9 +143,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t hContext, ///< [in] handle of the context object - void *pMem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t hContext, ///< [in] handle of the context object + void *pMem ///< [in] pointer to USM memory object ) { std::ignore = hContext; @@ -152,7 +153,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t hContext, ///< [in] handle of the context object const void *ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -223,3 +224,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( } return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index e3b90121a1..e89899ded7 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -15,7 +15,9 @@ #include "physical_mem.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +namespace ur::level_zero { + +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -39,24 +41,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, - size_t size, void **ppStart) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { ZE2UR_CALL(zeVirtualMemReserve, (hContext->ZeContext, pStart, size, ppStart)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemFree, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -69,10 +71,10 @@ urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, size_t offset, - ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -86,17 +88,18 @@ urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemUnmap, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( - ur_context_handle_t hContext, const void *pStart, - [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, + [[maybe_unused]] size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { @@ -119,3 +122,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 01f6a34325..0cb3777601 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -36,6 +36,13 @@ FetchContent_Declare(unified-memory-framework GIT_TAG ${UMF_TAG} ) +if (UR_STATIC_ADAPTER_L0) + if (UMF_BUILD_SHARED_LIBRARY) + message(STATUS "Static adapter is not compatible with shared UMF, switching to fully statically linked UMF") + set(UMF_BUILD_SHARED_LIBRARY OFF) + endif() +endif() + set(UMF_BUILD_TESTS OFF CACHE INTERNAL "Build UMF tests") set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "Build UMF examples") # TODO: L0 provider not ready yet @@ -85,9 +92,9 @@ install(TARGETS ur_common add_library(ur_umf INTERFACE) target_sources(ur_umf INTERFACE - umf_helpers.hpp - umf_pools/disjoint_pool_config_parser.cpp - ur_pool_manager.hpp + $ + $ + $ ) add_library(${PROJECT_NAME}::umf ALIAS ur_umf) diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 0475cf31e4..d66612b917 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -60,12 +60,14 @@ int ur_duplicate_fd(int pid, int fd_in); /////////////////////////////////////////////////////////////////////////////// #if defined(_WIN32) #define MAKE_LIBRARY_NAME(NAME, VERSION) NAME ".dll" +#define STATIC_LIBRARY_EXTENSION ".lib" #else #if defined(__APPLE__) #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME "." VERSION ".dylib" #else #define MAKE_LIBRARY_NAME(NAME, VERSION) "lib" NAME ".so." VERSION #endif +#define STATIC_LIBRARY_EXTENSION ".a" #endif inline std::string create_library_path(const char *name, const char *path) { diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index af05c81767..48329cfb37 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(${PROJECT_NAME}::loader ALIAS ur_loader) target_include_directories(ur_loader PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/layers ) @@ -60,6 +61,13 @@ target_link_libraries(ur_loader PRIVATE ${PROJECT_NAME}::headers ) +if(UR_STATIC_ADAPTER_L0) + target_link_libraries(ur_loader PRIVATE + ur_adapter_level_zero + ) + target_compile_definitions(ur_loader PRIVATE UR_STATIC_ADAPTER_LEVEL_ZERO) +endif() + if(UR_ENABLE_TRACING) target_link_libraries(ur_loader PRIVATE ${TARGET_XPTI}) target_include_directories(ur_loader PRIVATE ${xpti_SOURCE_DIR}/include) diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 25cd9a9fff..f718f35490 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -33,6 +33,14 @@ class AdapterRegistry { if (forceLoadedAdaptersOpt.has_value()) { for (const auto &s : forceLoadedAdaptersOpt.value()) { auto path = fs::path(s); + if (path.filename().extension() == STATIC_LIBRARY_EXTENSION) { + logger::warning( + "UR_ADAPTERS_FORCE_LOAD contains a path to a static" + "library {}, it will be skipped", + s); + continue; + } + bool exists = false; try { exists = fs::exists(path); @@ -41,11 +49,12 @@ class AdapterRegistry { } if (exists) { + forceLoaded = true; adaptersLoadPaths.emplace_back( std::vector{std::move(path)}); } else { logger::warning( - "Detected nonexistent path {} in environmental " + "Detected nonexistent path {} in environment " "variable UR_ADAPTERS_FORCE_LOAD", s); } @@ -92,6 +101,8 @@ class AdapterRegistry { size_t size() const noexcept { return adaptersLoadPaths.size(); } + bool adaptersForceLoaded() { return forceLoaded; } + std::vector>::const_iterator begin() const noexcept { return adaptersLoadPaths.begin(); } @@ -183,6 +194,8 @@ class AdapterRegistry { } } + bool forceLoaded = false; + public: void enableMock() { adaptersLoadPaths.clear(); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 56e16b769d..26f55c071f 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -8631,6 +8631,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8689,6 +8694,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8781,6 +8791,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8875,6 +8890,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8936,6 +8956,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9019,6 +9044,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9081,6 +9111,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9142,6 +9177,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9213,6 +9253,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9269,6 +9314,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9333,6 +9383,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9390,6 +9445,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9451,6 +9511,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9522,6 +9587,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9579,6 +9649,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9640,6 +9715,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9699,6 +9779,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9760,6 +9845,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9815,6 +9905,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9875,6 +9970,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9937,6 +10037,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index bfc9da3e50..f2b43f2725 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -8,13 +8,24 @@ * */ #include "ur_loader.hpp" +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#include "adapters/level_zero/ur_interface_loader.hpp" +#endif namespace ur_loader { /////////////////////////////////////////////////////////////////////////////// context_t *getContext() { return context_t::get_direct(); } -/////////////////////////////////////////////////////////////////////////////// ur_result_t context_t::init() { +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO + // If the adapters were force loaded, it means the user wants to use + // a specific adapter library. Don't load any static adapters. + if (!adapter_registry.adaptersForceLoaded()) { + auto &level_zero = platforms.emplace_back(nullptr); + ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable.ur); + } +#endif + for (const auto &adapterPaths : adapter_registry) { for (const auto &path : adapterPaths) { auto handle = LibLoader::loadAdapterLibrary(path.string().c_str()); diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index f372dc655d..b1c34b8916 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -40,11 +40,12 @@ if(UR_BUILD_ADAPTER_L0) generate_device_binaries kernel_names_header) endif() - if(NOT WIN32) + if(NOT WIN32 AND NOT UR_STATIC_ADAPTER_L0) # Make L0 use CallMap from a seprate shared lib so that we can access the map # from the tests. This only seems to work on linux add_library(zeCallMap SHARED zeCallMap.cpp) target_compile_definitions(ur_adapter_level_zero PRIVATE UR_L0_CALL_COUNT_IN_TESTS) + # TODO: stop exporting internals like this for tests... target_link_libraries(ur_adapter_level_zero PRIVATE zeCallMap) add_adapter_test(level_zero_ze_calls diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index 95f1f40902..f1c88a35ee 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -35,6 +35,10 @@ add_unittest(level_zero_command_list_cache add_unittest(level_zero_event_pool event_pool_test.cpp + ${PROJECT_SOURCE_DIR}/source/ur/ur.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/adapter.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/device.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/platform.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_pool_cache.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp From 3abe18cf9e4fc89573275d27df87c0060231b591 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Tue, 27 Aug 2024 13:10:04 +0200 Subject: [PATCH 2/2] Make /DEPENDENTLOADFLAG:0x800 link option PRIVATE This option, marked as PUBLIC, is propagated to the targets linked with the 'ur_adapter_level_zero' target when the Level Zero adapter is built as a static library. This led to a STATUS_DLL_NOT_FOUND error when running test binaries on Windows. --- source/adapters/level_zero/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 23382b919e..653bf4d118 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -172,7 +172,7 @@ if(UR_BUILD_ADAPTER_L0) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PUBLIC /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE