From 1c95ae44a13d407a56ca87e01eb8bbb8454146ae Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Thu, 4 Jul 2024 14:46:12 +0200 Subject: [PATCH] Add support for static linking of the L0 adapter This patch adds a cmake option that builds the L0 adapter as a static library and then embeds its in the loader. The mechanim itself is fairly generic and, when a need arises, the same can be done for other adapters. --- .github/workflows/build-hw-reusable.yml | 7 +- CMakeLists.txt | 1 + scripts/generate_code.py | 30 + scripts/templates/ldrddi.cpp.mako | 4 + scripts/templates/queue_api.cpp.mako | 4 +- .../templates/ur_interface_loader.cpp.mako | 88 ++ .../templates/ur_interface_loader.hpp.mako | 38 + source/adapters/level_zero/CMakeLists.txt | 12 +- source/adapters/level_zero/adapter.cpp | 18 +- source/adapters/level_zero/command_buffer.cpp | 93 +- source/adapters/level_zero/common.hpp | 2 +- source/adapters/level_zero/context.cpp | 17 +- source/adapters/level_zero/context.hpp | 2 +- source/adapters/level_zero/device.cpp | 759 +++++++-------- source/adapters/level_zero/device.hpp | 2 +- source/adapters/level_zero/event.cpp | 254 +++--- source/adapters/level_zero/event.hpp | 2 +- source/adapters/level_zero/image.cpp | 279 +++--- source/adapters/level_zero/image.hpp | 2 +- source/adapters/level_zero/kernel.cpp | 131 +-- source/adapters/level_zero/memory.cpp | 52 +- source/adapters/level_zero/memory.hpp | 2 +- source/adapters/level_zero/physical_mem.cpp | 11 +- source/adapters/level_zero/platform.cpp | 95 +- source/adapters/level_zero/program.cpp | 105 ++- source/adapters/level_zero/queue.cpp | 194 ++-- source/adapters/level_zero/queue.hpp | 2 +- source/adapters/level_zero/queue_api.cpp | 210 +++-- source/adapters/level_zero/sampler.cpp | 23 +- .../level_zero/ur_interface_loader.cpp | 862 ++++++++++-------- .../level_zero/ur_interface_loader.hpp | 703 ++++++++++++++ source/adapters/level_zero/ur_level_zero.hpp | 2 +- source/adapters/level_zero/usm.cpp | 217 ++--- source/adapters/level_zero/usm_p2p.cpp | 20 +- .../level_zero/v2/command_list_cache.hpp | 2 +- source/adapters/level_zero/virtual_mem.cpp | 42 +- source/loader/CMakeLists.txt | 7 + source/loader/ur_ldrddi.cpp | 105 +++ source/loader/ur_loader.cpp | 10 + source/loader/ur_loader.hpp | 2 + 40 files changed, 2803 insertions(+), 1608 deletions(-) create mode 100644 scripts/templates/ur_interface_loader.cpp.mako create mode 100644 scripts/templates/ur_interface_loader.hpp.mako create mode 100644 source/adapters/level_zero/ur_interface_loader.hpp diff --git a/.github/workflows/build-hw-reusable.yml b/.github/workflows/build-hw-reusable.yml index 8c8f28a868..a350f60c8d 100644 --- a/.github/workflows/build-hw-reusable.yml +++ b/.github/workflows/build-hw-reusable.yml @@ -29,7 +29,7 @@ jobs: strategy: matrix: adapter: [ - {name: "${{inputs.name}}", platform: "${{inputs.platform}}"}, + {name: "${{inputs.name}}", platform: "${{inputs.platform}}", static: OFF}, ] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] @@ -37,6 +37,10 @@ jobs: exclude: - adapter: {name: L0, platform: ""} compiler: {c: clang, cxx: clang++} + include: + - adapter: {name: L0, platform: "", static: ON} + build_type: [Debug] + compiler: {c: gcc, cxx: g++} runs-on: ${{matrix.adapter.name}} @@ -63,6 +67,7 @@ jobs: -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON -DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON + -DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static}} -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ -DUR_SYCL_LIBRARY_DIR=${{github.workspace}}/dpcpp_compiler/lib ${{ matrix.adapter.name == 'HIP' && '-DUR_CONFORMANCE_AMD_ARCH=gfx1030' || '' }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 952f4d8175..3887a7b5b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF) option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF) option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF) option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF) +option(UR_STATIC_ADAPTER_L0 "Build the Level-Zero adapter as static and embed in the loader" OFF) option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF) option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF) option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF) diff --git a/scripts/generate_code.py b/scripts/generate_code.py index b3a1146a3d..567a578ac8 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -379,6 +379,32 @@ def generate_loader(path, section, namespace, tags, version, specs, meta): ) print("Generated %s lines of code.\n"%loc) +""" + generates c/c++ files from the specification documents +""" +def _mako_interface_loader_api(path, adapter, ext, namespace, tags, version, specs, meta): + dstpath = os.path.join(path, adapter) + os.makedirs(dstpath, exist_ok=True) + + template = f"ur_interface_loader.{ext}.mako" + fin = os.path.join(templates_dir, template) + + name = f"ur_interface_loader" + + filename = f"{name}.{ext}" + fout = os.path.join(dstpath, filename) + + print("Generating %s..."%fout) + return util.makoWrite( + fin, fout, + name=name, + adapter=adapter, + ver=version, + namespace=namespace, + tags=tags, + specs=specs, + meta=meta,) + """ Entry-point: generates adapter for unified_runtime @@ -395,6 +421,10 @@ def generate_adapters(path, section, namespace, tags, version, specs, meta): loc += _mako_linker_scripts( dstpath, "adapter", "def", namespace, tags, version, specs, meta ) + + loc += _mako_interface_loader_api(dstpath, "level_zero", "cpp", namespace, tags, version, specs, meta) + loc += _mako_interface_loader_api(dstpath, "level_zero", "hpp", namespace, tags, version, specs, meta) + print("Generated %s lines of code.\n"%loc) """ diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index 44631cc360..9c797a0ec3 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -365,6 +365,10 @@ ${tbl['export']['name']}( // Load the device-platform DDI tables for( auto& platform : ur_loader::getContext()->platforms ) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) + continue; + if(platform.initStatus != ${X}_RESULT_SUCCESS) continue; auto getTable = reinterpret_cast<${tbl['pfn']}>( diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako index f941c7ba03..fcfa89d258 100644 --- a/scripts/templates/queue_api.cpp.mako +++ b/scripts/templates/queue_api.cpp.mako @@ -24,8 +24,9 @@ from templates import helper as th ur_queue_handle_t_::~ur_queue_handle_t_() {} ## FUNCTION ################################################################### +namespace ${x}::level_zero { %for obj in th.get_queue_related_functions(specs, n, tags): -${X}_APIEXPORT ${x}_result_t ${X}_APICALL +${x}_result_t ${th.make_func_name(n, tags, obj)}( %for line in th.make_param_lines(n, tags, obj, format=["name", "type", "delim"]): ${line} @@ -35,3 +36,4 @@ ${th.make_func_name(n, tags, obj)}( return ${obj['params'][0]['name']}->${th.transform_queue_related_function_name(n, tags, obj, format=["name"])}; } %endfor +} \ No newline at end of file diff --git a/scripts/templates/ur_interface_loader.cpp.mako b/scripts/templates/ur_interface_loader.cpp.mako new file mode 100644 index 0000000000..3298b5bcae --- /dev/null +++ b/scripts/templates/ur_interface_loader.cpp.mako @@ -0,0 +1,88 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.cpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +#include "ur_interface_loader.hpp" + +static ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + // Pre 1.0 we enforce loader and adapter must have same version. + // Post 1.0 only major version match should be required. + if (version != UR_API_VERSION_CURRENT) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ${n}::${adapter} { +#elif defined(__cplusplus) +extern "C" { +#endif + +%for tbl in th.get_pfntables(specs, meta, n, tags): +${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}( + %for line in th.make_param_lines(n, tags, tbl['export'], format=["type", "name", "delim"]): + ${line} + %endfor + ) +{ + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; + } + + %for obj in tbl['functions']: + pDdiTable->${th.append_ws(th.make_pfn_name(n, tags, obj), 43)} = ${n}::${adapter}::${th.make_func_name(n, tags, obj)}; + %endfor + + return result; +} + +%endfor + +#ifdef UR_STATIC_ADAPTER_${Adapter} +} // namespace ur::${adapter} +#elif defined(__cplusplus) +} // extern "C" +#endif + +#ifdef UR_STATIC_ADAPTER_${Adapter} +namespace ur::${adapter} { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + +%for tbl in th.get_pfntables(specs, meta, n, tags): + result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); + if (result != UR_RESULT_SUCCESS) + return result; +%endfor + + return result; +} +} +#endif diff --git a/scripts/templates/ur_interface_loader.hpp.mako b/scripts/templates/ur_interface_loader.hpp.mako new file mode 100644 index 0000000000..e2902f93c8 --- /dev/null +++ b/scripts/templates/ur_interface_loader.hpp.mako @@ -0,0 +1,38 @@ +<%! +import re +from templates import helper as th +%><% + n=namespace + N=n.upper() + + x=tags['$x'] + X=x.upper() + Adapter=adapter.upper() +%>//===--------- ${n}_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include <${n}_api.h> +#include <${n}_ddi.h> + +namespace ${n}::${adapter} { +%for s in specs: +%for obj in th.filter_items(s['objects'], 'type', 'function'): +%if not th.obj_traits.is_loader_only(obj): +${x}_result_t ${th.make_func_name(n, tags, obj)}( + %for line in th.make_param_lines(n, tags, obj, format=["type", "name", "delim"]): + ${line} + %endfor + ); +%endif +%endfor +%endfor +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 1c74124fc4..3e68adb330 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -93,8 +93,12 @@ target_include_directories(LevelZeroLoader-Headers INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" ) -add_ur_adapter(${TARGET_NAME} - SHARED +set(ADAPTER_LIB_TYPE SHARED) +if(UR_STATIC_ADAPTER_L0) + set(ADAPTER_LIB_TYPE STATIC) +endif() + +add_ur_adapter(${TARGET_NAME} ${ADAPTER_LIB_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp @@ -137,6 +141,10 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp ) +if(UR_STATIC_ADAPTER_L0) + target_compile_definitions(ur_adapter_level_zero PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO) +endif() + if(NOT WIN32) target_sources(ur_adapter_level_zero PRIVATE diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 0605b9a40c..b3f3b6b0a3 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -240,7 +240,8 @@ ur_result_t adapterStateTeardown() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( +namespace ur::level_zero { +ur_result_t urAdapterGet( uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phAdapters. If phAdapters is not NULL, then ///< NumEntries should be greater than zero, otherwise @@ -281,7 +282,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { +ur_result_t urAdapterRelease(ur_adapter_handle_t) { // Check first if the Adapter pointer is valid if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; @@ -293,7 +294,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { +ur_result_t urAdapterRetain(ur_adapter_handle_t) { if (GlobalAdapter) { std::lock_guard Lock{GlobalAdapter->Mutex}; GlobalAdapter->RefCount++; @@ -302,7 +303,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( +ur_result_t urAdapterGetLastError( ur_adapter_handle_t, ///< [in] handle of the platform instance const char **Message, ///< [out] pointer to a C string where the adapter ///< specific error message will be stored. @@ -315,11 +316,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( return ErrorMessageCode; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, - ur_adapter_info_t PropName, - size_t PropSize, - void *PropValue, - size_t *PropSizeRet) { +ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -333,3 +332,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 0fe7e646f6..434f808351 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "command_buffer.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" /* L0 Command-buffer Extension Doc see: @@ -445,18 +446,18 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( IsUpdatable(Desc ? Desc->isUpdatable : false), IsProfilingEnabled(Desc ? Desc->enableProfiling : false), IsInOrderCmdList(IsInOrderCmdList) { - urContextRetain(Context); - urDeviceRetain(Device); + ur::level_zero::urContextRetain(Context); + ur::level_zero::urDeviceRetain(Device); } // The ur_exp_command_buffer_handle_t_ destructor releases all the memory // objects allocated for command_buffer management. ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { // Release the memory allocated to the Context stored in the command_buffer - urContextRelease(Context); + ur::level_zero::urContextRelease(Context); // Release the device - urDeviceRelease(Device); + ur::level_zero::urDeviceRelease(Device); // Release the memory allocated to the CommandList stored in the // command_buffer @@ -526,7 +527,7 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { for (auto &AssociatedKernel : KernelsList) { ReleaseIndirectMem(AssociatedKernel); - urKernelRelease(AssociatedKernel); + ur::level_zero::urKernelRelease(AssociatedKernel); } } @@ -537,16 +538,16 @@ ur_exp_command_buffer_command_handle_t_:: ur_kernel_handle_t Kernel = nullptr) : CommandBuffer(CommandBuffer), CommandId(CommandId), WorkDim(WorkDim), UserDefinedLocalSize(UserDefinedLocalSize), Kernel(Kernel) { - urCommandBufferRetainExp(CommandBuffer); + ur::level_zero::urCommandBufferRetainExp(CommandBuffer); if (Kernel) - urKernelRetain(Kernel); + ur::level_zero::urKernelRetain(Kernel); } ur_exp_command_buffer_command_handle_t_:: ~ur_exp_command_buffer_command_handle_t_() { - urCommandBufferReleaseExp(CommandBuffer); + ur::level_zero::urCommandBufferReleaseExp(CommandBuffer); if (Kernel) - urKernelRelease(Kernel); + ur::level_zero::urKernelRelease(Kernel); } void ur_exp_command_buffer_handle_t_::registerSyncPoint( @@ -583,7 +584,7 @@ ur_result_t ur_exp_command_buffer_handle_t_::getFenceForQueue( return UR_RESULT_SUCCESS; } -namespace { +namespace ur::level_zero { /** * Creates a L0 command list @@ -642,9 +643,8 @@ bool canBeInOrder(ur_context_handle_t Context, ? (CommandBufferDesc ? CommandBufferDesc->isInOrder : false) : false; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, const ur_exp_command_buffer_desc_t *CommandBufferDesc, ur_exp_command_buffer_handle_t *CommandBuffer) { @@ -716,13 +716,13 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferRetainExp(ur_exp_command_buffer_handle_t CommandBuffer) { CommandBuffer->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { if (!CommandBuffer->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -731,7 +731,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL +ur_result_t urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { UR_ASSERT(CommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); // It is not allowed to append to command list from multiple threads. @@ -775,8 +775,6 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { return UR_RESULT_SUCCESS; } -namespace { - /** * Sets the global offset for a kernel command that will be appended to the * command buffer. @@ -878,9 +876,8 @@ createCommandHandle(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( +ur_result_t urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, uint32_t WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, const size_t *LocalWorkSize, @@ -917,7 +914,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( // is in use. Once the event has been signaled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); if (Command && CommandBuffer->IsUpdatable) { UR_CALL(createCommandHandle(CommandBuffer, Kernel, WorkDim, LocalWorkSize, @@ -938,7 +935,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( +ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, size_t Size, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -953,7 +950,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( +ur_result_t urCommandBufferAppendMemBufferCopyExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, size_t SrcOffset, size_t DstOffset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -983,7 +980,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t SrcMem, ur_mem_handle_t DstMem, ur_rect_offset_t SrcOrigin, ur_rect_offset_t DstOrigin, ur_rect_region_t Region, size_t SrcRowPitch, @@ -1016,7 +1013,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( +ur_result_t urCommandBufferAppendMemBufferWriteExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, const void *Src, uint32_t NumSyncPointsInWaitList, @@ -1038,7 +1035,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -1063,7 +1060,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( +ur_result_t urCommandBufferAppendMemBufferReadExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, size_t Offset, size_t Size, void *Dst, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1083,7 +1080,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( +ur_result_t urCommandBufferAppendMemBufferReadRectExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t Region, size_t BufferRowPitch, size_t BufferSlicePitch, @@ -1107,7 +1104,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( +ur_result_t urCommandBufferAppendUSMPrefetchExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_migration_flags_t Flags, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1146,7 +1143,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( +ur_result_t urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_handle_t CommandBuffer, const void *Mem, size_t Size, ur_usm_advice_flags_t Advice, uint32_t NumSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *SyncPointWaitList, @@ -1208,7 +1205,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( +ur_result_t urCommandBufferAppendMemBufferFillExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_mem_handle_t Buffer, const void *Pattern, size_t PatternSize, size_t Offset, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1229,7 +1226,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( +ur_result_t urCommandBufferAppendUSMFillExp( ur_exp_command_buffer_handle_t CommandBuffer, void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, uint32_t NumSyncPointsInWaitList, @@ -1243,8 +1240,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( Size, NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint); } -namespace { - /** * Gets an L0 command queue that supports the chosen engine. * @param[in] Queue The UR queue used to submit the command buffer. @@ -1364,9 +1359,8 @@ ur_result_t createUserEvent(ur_exp_command_buffer_handle_t CommandBuffer, return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( +ur_result_t urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t UrQueue, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) { @@ -1432,13 +1426,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferRetainCommandExp( +ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t Command) { Command->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( +ur_result_t urCommandBufferReleaseCommandExp( ur_exp_command_buffer_command_handle_t Command) { if (!Command->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1447,8 +1441,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return UR_RESULT_SUCCESS; } -namespace { - /** * Validates contents of the update command description. * @param[in] Command The command which is being updated. @@ -1758,9 +1750,8 @@ ur_result_t updateKernelCommand( return UR_RESULT_SUCCESS; } -} // namespace -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( +ur_result_t urCommandBufferUpdateKernelLaunchExp( ur_exp_command_buffer_command_handle_t Command, const ur_exp_command_buffer_update_kernel_launch_desc_t *CommandDesc) { UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1791,10 +1782,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( - ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { @@ -1807,10 +1799,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( - ur_exp_command_buffer_command_handle_t Command, - ur_exp_command_buffer_command_info_t PropName, size_t PropSize, - void *PropValue, size_t *PropSizeRet) { +ur_result_t +urCommandBufferCommandGetInfoExp(ur_exp_command_buffer_command_handle_t Command, + ur_exp_command_buffer_command_info_t PropName, + size_t PropSize, void *PropValue, + size_t *PropSizeRet) { UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); switch (PropName) { @@ -1822,3 +1815,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index 59785afb82..610d3f7ba7 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index abcd386c7b..32bf095006 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -18,7 +18,9 @@ #include "queue.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( +namespace ur::level_zero { + +ur_result_t urContextCreate( uint32_t DeviceCount, ///< [in] the number of devices given in phDevices const ur_device_handle_t *Devices, ///< [in][range(0, DeviceCount)] array of handle of devices. @@ -53,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( +ur_result_t urContextRetain( ur_context_handle_t Context ///< [in] handle of the context to get a reference of. ) { @@ -61,7 +63,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextRelease( +ur_result_t urContextRelease( ur_context_handle_t Context ///< [in] handle of the context to release. ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -85,7 +87,7 @@ static const bool UseMemcpy2DOperations = [] { return std::atoi(UseMemcpy2DOperationsFlag) > 0; }(); -UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( +ur_result_t urContextGetInfo( ur_context_handle_t Context, ///< [in] handle of the context ur_context_info_t ContextInfoType, ///< [in] type of the info to retrieve size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( +ur_result_t urContextGetNativeHandle( ur_context_handle_t Context, ///< [in] handle of the context. ur_native_handle_t *NativeContext ///< [out] a pointer to the native ///< handle of the context. @@ -142,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( +ur_result_t urContextCreateWithNativeHandle( ur_native_handle_t NativeContext, ///< [in] the native handle of the context. uint32_t NumDevices, const ur_device_handle_t *Devices, @@ -166,7 +168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( +ur_result_t urContextSetExtendedDeleter( ur_context_handle_t Context, ///< [in] handle of the context. ur_context_extended_deleter_t Deleter, ///< [in] Function pointer to extended deleter. @@ -180,6 +182,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero ur_result_t ur_context_handle_t_::initialize() { diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index dc70a2470c..633824187a 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index d7dc022060..c42db65b16 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -10,13 +10,59 @@ #include "device.hpp" #include "adapter.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include #include #include -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet( +// UR_L0_USE_COPY_ENGINE can be set to an integer value, or +// a pair of integer values of the form "lower_index:upper_index". +// Here, the indices point to copy engines in a list of all available copy +// engines. +// This functions returns this pair of indices. +// If the user specifies only a single integer, a value of 0 indicates that +// the copy engines will not be used at all. A value of 1 indicates that all +// available copy engines can be used. +const std::pair +getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { + const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); + const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); + // If the environment variable is not set, no copy engines are used when + // immediate commandlists are being used. For standard commandlists all are + // used. + if (!EnvVar) { + if (Device->ImmCommandListUsed) + return std::pair(0, 0); // Only main copy engine will be used. + return std::pair(0, INT_MAX); // All copy engines will be used. + } + std::string CopyEngineRange = EnvVar; + // Environment variable can be a single integer or a pair of integers + // separated by ":" + auto pos = CopyEngineRange.find(":"); + if (pos == std::string::npos) { + bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); + if (UseCopyEngine) + return std::pair(0, INT_MAX); // All copy engines can be used. + return std::pair(-1, -1); // No copy engines will be used. + } + int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); + int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); + if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || + (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { + logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " + "default set."); + LowerCopyEngineIndex = 0; + UpperCopyEngineIndex = INT_MAX; + } + return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); +} + +namespace ur::level_zero { + +ur_result_t urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to @@ -1003,156 +1049,357 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_SUCCESS; } -// UR_L0_USE_COPY_ENGINE can be set to an integer value, or -// a pair of integer values of the form "lower_index:upper_index". -// Here, the indices point to copy engines in a list of all available copy -// engines. -// This functions returns this pair of indices. -// If the user specifies only a single integer, a value of 0 indicates that -// the copy engines will not be used at all. A value of 1 indicates that all -// available copy engines can be used. -const std::pair -getRangeOfAllowedCopyEngines(const ur_device_handle_t &Device) { - const char *UrRet = std::getenv("UR_L0_USE_COPY_ENGINE"); - const char *PiRet = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); - static const char *EnvVar = UrRet ? UrRet : (PiRet ? PiRet : nullptr); - // If the environment variable is not set, no copy engines are used when - // immediate commandlists are being used. For standard commandlists all are - // used. - if (!EnvVar) { - if (Device->ImmCommandListUsed) - return std::pair(0, 0); // Only main copy engine will be used. - return std::pair(0, INT_MAX); // All copy engines will be used. - } - std::string CopyEngineRange = EnvVar; - // Environment variable can be a single integer or a pair of integers - // separated by ":" - auto pos = CopyEngineRange.find(":"); - if (pos == std::string::npos) { - bool UseCopyEngine = (std::stoi(CopyEngineRange) != 0); - if (UseCopyEngine) - return std::pair(0, INT_MAX); // All copy engines can be used. - return std::pair(-1, -1); // No copy engines will be used. - } - int LowerCopyEngineIndex = std::stoi(CopyEngineRange.substr(0, pos)); - int UpperCopyEngineIndex = std::stoi(CopyEngineRange.substr(pos + 1)); - if ((LowerCopyEngineIndex > UpperCopyEngineIndex) || - (LowerCopyEngineIndex < -1) || (UpperCopyEngineIndex < -1)) { - logger::error("UR_L0_LEVEL_ZERO_USE_COPY_ENGINE: invalid value provided, " - "default set."); - LowerCopyEngineIndex = 0; - UpperCopyEngineIndex = INT_MAX; - } - return std::pair(LowerCopyEngineIndex, UpperCopyEngineIndex); -} - bool CopyEngineRequested(const ur_device_handle_t &Device) { int LowerCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).first; int UpperCopyQueueIndex = getRangeOfAllowedCopyEngines(Device).second; return ((LowerCopyQueueIndex != -1) || (UpperCopyQueueIndex != -1)); } -// Whether immediate commandlists will be used for kernel launches and copies. -// The default is standard commandlists. Setting 1 or 2 specifies use of -// immediate commandlists. Note: when immediate commandlists are used then -// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. -// (See env var UR_L0_DEVICE_SCOPE_EVENTS). - -// Get value of immediate commandlists env var setting or -1 if unset -ur_device_handle_t_::ImmCmdlistMode -ur_device_handle_t_::useImmediateCommandLists() { - // If immediate commandlist setting is not explicitly set, then use the device - // default. - // TODO: confirm this is good once make_queue revert is added - static const int ImmediateCommandlistsSetting = [] { - const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); - const char *PiRet = - std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); - const char *ImmediateCommandlistsSettingStr = - UrRet ? UrRet : (PiRet ? PiRet : nullptr); - if (!ImmediateCommandlistsSettingStr) - return -1; - return std::atoi(ImmediateCommandlistsSettingStr); - }(); +UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( + ur_device_handle_t Device, ///< [in] handle of the device to partition. + const ur_device_partition_properties_t + *Properties, ///< [in] Device partition properties. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t + *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle + ///< of devices. If NumDevices is less than the number of + ///< sub-devices available, then the function shall only + ///< retrieve that number of sub-devices. + uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of + ///< sub-devices the device can be partitioned into + ///< according to the partitioning property. +) { + // Other partitioning ways are not supported by Level Zero + UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + if ((Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && + Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + if (Properties->pProperties->value.affinity_domain != 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_INVALID_VALUE; + } - if (ImmediateCommandlistsSetting == -1) - // Change this to PerQueue as default after more testing. -#ifdef _WIN32 - return NotUsed; -#else - return isPVC() ? PerQueue : NotUsed; -#endif - switch (ImmediateCommandlistsSetting) { - case 0: - return NotUsed; - case 1: - return PerQueue; - case 2: - return PerThreadPerQueue; - default: - return NotUsed; + // Devices cache is normally created in piDevicesGet but still make + // sure that cache is populated. + // + auto Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != UR_RESULT_SUCCESS) { + return Res; } -} -bool ur_device_handle_t_::useRelaxedAllocationLimits() { - static const bool EnableRelaxedAllocationLimits = [] { - auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); - const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; - return RetVal; - }(); + auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { + if (Device->SubDevices.size() == 0) + return 0; - return EnableRelaxedAllocationLimits; -} + // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. + // However, if + // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that + // still expose CSlices in partitioning by affinity domain for compatibility + // reasons. + if (Properties->pProperties->type == + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && + !ExposeCSliceInAffinityPartitioning) { + if (Device->isSubDevice()) { + return 0; + } + } + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + // Not a CSlice-based partitioning. + if (!Device->SubDevices[0]->isCCS()) { + return 0; + } + } -bool ur_device_handle_t_::useDriverInOrderLists() { - // Use in-order lists implementation from L0 driver instead - // of adapter's implementation. - static const bool UseDriverInOrderLists = [] { - const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); - if (!UrRet) - return false; - return std::atoi(UrRet) != 0; + return Device->SubDevices.size(); }(); - return UseDriverInOrderLists; -} - -ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, - int SubSubDeviceIndex) { - // Maintain various device properties cache. - // Note that we just describe here how to compute the data. - // The real initialization is upon first access. + // TODO: Consider support for partitioning to <= total sub-devices. + // Currently supported partitioning (by affinity domain/numa) would always + // partition to all sub-devices. // - auto ZeDevice = this->ZeDevice; - ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); - }; - - ZeDeviceComputeProperties.Compute = - [ZeDevice](ze_device_compute_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); - }; + if (NumDevices != 0) + UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - ZeDeviceIpVersionExt.Compute = - [ZeDevice](ze_device_ip_version_ext_t &Properties) { - ze_device_properties_t P; - P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; - P.pNext = (void *)&Properties; - ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); - }; + for (uint32_t I = 0; I < NumDevices; I++) { + auto prop = Properties->pProperties[0]; + if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + // In case the value is NEXT_PARTITIONABLE, we need to change it to the + // chosen domain. This will always be NUMA since that's the only domain + // supported by level zero. + prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + } + Device->SubDevices[I]->SubDeviceCreationProperty = prop; - ZeDeviceImageProperties.Compute = - [ZeDevice](ze_device_image_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); - }; + OutDevices[I] = Device->SubDevices[I]; + // reusing the same pi_device needs to increment the reference count + ur::level_zero::urDeviceRetain(OutDevices[I]); + } - ZeDeviceModuleProperties.Compute = - [ZeDevice](ze_device_module_properties_t &Properties) { - ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); - }; + if (NumDevicesRet) { + *NumDevicesRet = EffectiveNumDevices; + } + return UR_RESULT_SUCCESS; +} - ZeDeviceMemoryProperties.Compute = - [ZeDevice]( +UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( + ur_device_handle_t + Device, ///< [in] handle of the device to select binary for. + const ur_device_binary_t + *Binaries, ///< [in] the array of binaries to select from. + uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. + ///< Must greater than or equal to zero otherwise + ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. + uint32_t + *SelectedBinary ///< [out] the index of the selected binary in the input + ///< array of binaries. If a suitable binary was not + ///< found the function returns ${X}_INVALID_BINARY. +) { + std::ignore = Device; + // TODO: this is a bare-bones implementation for choosing a device image + // that would be compatible with the targeted device. An AOT-compiled + // image is preferred over SPIR-V for known devices (i.e. Intel devices) + // The implementation makes no effort to differentiate between multiple images + // for the given device, and simply picks the first one compatible. + // + // Real implementation will use the same mechanism OpenCL ICD dispatcher + // uses. Something like: + // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); + // return context->dispatch->piextDeviceSelectIR( + // ctx, images, num_images, selected_image); + // where context->dispatch is set to the dispatch table provided by PI + // plugin for platform/device the ctx was created for. + + // Look for GEN binary, which we known can only be handled by Level-Zero now. + const char *BinaryTarget = + UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; + + uint32_t *SelectedBinaryInd = SelectedBinary; + + // Find the appropriate device image, fallback to spirv if not found + constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); + uint32_t Spirv = InvalidInd; + + for (uint32_t i = 0; i < NumBinaries; ++i) { + if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { + *SelectedBinaryInd = i; + return UR_RESULT_SUCCESS; + } + if (strcmp(Binaries[i].pDeviceTargetSpec, + UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) + Spirv = i; + } + // Points to a spirv image, if such indeed was found + if ((*SelectedBinaryInd = Spirv) != InvalidInd) + return UR_RESULT_SUCCESS; + + // No image can be loaded for the given device + return UR_RESULT_ERROR_INVALID_BINARY; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( + ur_device_handle_t Device, ///< [in] handle of the device. + ur_native_handle_t + *NativeDevice ///< [out] a pointer to the native handle of the device. +) { + *NativeDevice = reinterpret_cast(Device->ZeDevice); + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( + ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. + ur_platform_handle_t Platform, ///< [in] handle of the platform instance + const ur_device_native_properties_t + *Properties, ///< [in][optional] pointer to native device properties + ///< struct. + ur_device_handle_t + *Device ///< [out] pointer to the handle of the device object created. +) { + std::ignore = Properties; + auto ZeDevice = ur_cast(NativeDevice); + + // The SYCL spec requires that the set of devices must remain fixed for the + // duration of the application's execution. We assume that we found all of the + // Level Zero devices when we initialized the platforms/devices cache, so the + // "NativeHandle" must already be in the cache. If it is not, this must not be + // a valid Level Zero device. + + ur_device_handle_t Dev = nullptr; + if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { + for (const auto &p : *platforms) { + Dev = p->getDeviceFromNativeHandle(ZeDevice); + if (Dev) { + // Check that the input Platform, if was given, matches the found one. + UR_ASSERT(!Platform || Platform == p.get(), + UR_RESULT_ERROR_INVALID_PLATFORM); + break; + } + } + } else { + return GlobalAdapter->PlatformCache->get_error(); + } + + if (Dev == nullptr) + return UR_RESULT_ERROR_INVALID_VALUE; + + *Device = Dev; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( + ur_device_handle_t Device, ///< [in] handle of the device instance + uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's + ///< global timestamp that correlates with the + ///< Host's global timestamp value + uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global + ///< timestamp that correlates with the Device's + ///< global timestamp value +) { + const uint64_t &ZeTimerResolution = + Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = Device->getTimestampMask(); + uint64_t DeviceClockCount, Dummy; + + ZE2UR_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, + HostTimestamp == nullptr ? &Dummy : HostTimestamp, + &DeviceClockCount)); + + if (DeviceTimestamp != nullptr) { + *DeviceTimestamp = + (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRetain(ur_device_handle_t Device) { + // The root-device ref-count remains unchanged (always 1). + if (Device->isSubDevice()) { + Device->RefCount.increment(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urDeviceRelease(ur_device_handle_t Device) { + // Root devices are destroyed during the piTearDown process. + if (Device->isSubDevice()) { + if (Device->RefCount.decrementAndTest()) { + delete Device; + } + } + + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + +// Whether immediate commandlists will be used for kernel launches and copies. +// The default is standard commandlists. Setting 1 or 2 specifies use of +// immediate commandlists. Note: when immediate commandlists are used then +// device-only events must be either AllHostVisible or OnDemandHostVisibleProxy. +// (See env var UR_L0_DEVICE_SCOPE_EVENTS). + +// Get value of immediate commandlists env var setting or -1 if unset +ur_device_handle_t_::ImmCmdlistMode +ur_device_handle_t_::useImmediateCommandLists() { + // If immediate commandlist setting is not explicitly set, then use the device + // default. + // TODO: confirm this is good once make_queue revert is added + static const int ImmediateCommandlistsSetting = [] { + const char *UrRet = std::getenv("UR_L0_USE_IMMEDIATE_COMMANDLISTS"); + const char *PiRet = + std::getenv("SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS"); + const char *ImmediateCommandlistsSettingStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + if (!ImmediateCommandlistsSettingStr) + return -1; + return std::atoi(ImmediateCommandlistsSettingStr); + }(); + + if (ImmediateCommandlistsSetting == -1) + // Change this to PerQueue as default after more testing. +#ifdef _WIN32 + return NotUsed; +#else + return isPVC() ? PerQueue : NotUsed; +#endif + switch (ImmediateCommandlistsSetting) { + case 0: + return NotUsed; + case 1: + return PerQueue; + case 2: + return PerThreadPerQueue; + default: + return NotUsed; + } +} + +bool ur_device_handle_t_::useRelaxedAllocationLimits() { + static const bool EnableRelaxedAllocationLimits = [] { + auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS"); + const bool RetVal = UrRet ? std::stoi(*UrRet) : 0; + return RetVal; + }(); + + return EnableRelaxedAllocationLimits; +} + +bool ur_device_handle_t_::useDriverInOrderLists() { + // Use in-order lists implementation from L0 driver instead + // of adapter's implementation. + static const bool UseDriverInOrderLists = [] { + const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS"); + if (!UrRet) + return false; + return std::atoi(UrRet) != 0; + }(); + + return UseDriverInOrderLists; +} + +ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, + int SubSubDeviceIndex) { + // Maintain various device properties cache. + // Note that we just describe here how to compute the data. + // The real initialization is upon first access. + // + auto ZeDevice = this->ZeDevice; + ZeDeviceProperties.Compute = [ZeDevice](ze_device_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceComputeProperties.Compute = + [ZeDevice](ze_device_compute_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetComputeProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceIpVersionExt.Compute = + [ZeDevice](ze_device_ip_version_ext_t &Properties) { + ze_device_properties_t P; + P.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + P.pNext = (void *)&Properties; + ZE_CALL_NOCHECK(zeDeviceGetProperties, (ZeDevice, &P)); + }; + + ZeDeviceImageProperties.Compute = + [ZeDevice](ze_device_image_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetImageProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceModuleProperties.Compute = + [ZeDevice](ze_device_module_properties_t &Properties) { + ZE_CALL_NOCHECK(zeDeviceGetModuleProperties, (ZeDevice, &Properties)); + }; + + ZeDeviceMemoryProperties.Compute = + [ZeDevice]( std::pair>, std::vector>> &Properties) { @@ -1244,7 +1491,7 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_ERROR_UNKNOWN; } - if (CopyEngineRequested((ur_device_handle_t)this)) { + if (ur::level_zero::CopyEngineRequested((ur_device_handle_t)this)) { for (uint32_t i = 0; i < numQueueGroups; i++) { if (((QueueGroupProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && @@ -1285,26 +1532,6 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, return UR_RESULT_SUCCESS; } -ur_result_t urDeviceRetain(ur_device_handle_t Device) { - - // The root-device ref-count remains unchanged (always 1). - if (Device->isSubDevice()) { - Device->RefCount.increment(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urDeviceRelease(ur_device_handle_t Device) { - // Root devices are destroyed during the piTearDown process. - if (Device->isSubDevice()) { - if (Device->RefCount.decrementAndTest()) { - delete Device; - } - } - - return UR_RESULT_SUCCESS; -} - void ZeDriverVersionStringExtension::setZeDriverVersionString( ur_platform_handle_t_ *Platform) { // Check if Intel Driver Version String is available. If yes, save the API @@ -1372,227 +1599,3 @@ void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle, void *HostPtr) { ZE_CALL_NOCHECK(zexDriverReleaseImportedPointer, (DriverHandle, HostPtr)); } - -UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( - ur_device_handle_t Device, ///< [in] handle of the device to partition. - const ur_device_partition_properties_t - *Properties, ///< [in] Device partition properties. - uint32_t NumDevices, ///< [in] the number of sub-devices. - ur_device_handle_t - *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle - ///< of devices. If NumDevices is less than the number of - ///< sub-devices available, then the function shall only - ///< retrieve that number of sub-devices. - uint32_t *NumDevicesRet ///< [out][optional] pointer to the number of - ///< sub-devices the device can be partitioned into - ///< according to the partitioning property. -) { - // Other partitioning ways are not supported by Level Zero - UR_ASSERT(Properties->PropCount == 1, UR_RESULT_ERROR_INVALID_VALUE); - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - if ((Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && - Properties->pProperties->value.affinity_domain != - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - if (Properties->pProperties->value.affinity_domain != 0) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else { - return UR_RESULT_ERROR_INVALID_VALUE; - } - - // Devices cache is normally created in piDevicesGet but still make - // sure that cache is populated. - // - auto Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != UR_RESULT_SUCCESS) { - return Res; - } - - auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { - if (Device->SubDevices.size() == 0) - return 0; - - // Sub-Sub-Devices are partitioned by CSlices, not by affinity domain. - // However, if - // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that - // still expose CSlices in partitioning by affinity domain for compatibility - // reasons. - if (Properties->pProperties->type == - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && - !ExposeCSliceInAffinityPartitioning) { - if (Device->isSubDevice()) { - return 0; - } - } - if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { - // Not a CSlice-based partitioning. - if (!Device->SubDevices[0]->isCCS()) { - return 0; - } - } - - return Device->SubDevices.size(); - }(); - - // TODO: Consider support for partitioning to <= total sub-devices. - // Currently supported partitioning (by affinity domain/numa) would always - // partition to all sub-devices. - // - if (NumDevices != 0) - UR_ASSERT(NumDevices == EffectiveNumDevices, UR_RESULT_ERROR_INVALID_VALUE); - - for (uint32_t I = 0; I < NumDevices; I++) { - auto prop = Properties->pProperties[0]; - if (prop.type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - // In case the value is NEXT_PARTITIONABLE, we need to change it to the - // chosen domain. This will always be NUMA since that's the only domain - // supported by level zero. - prop.value.affinity_domain = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - } - Device->SubDevices[I]->SubDeviceCreationProperty = prop; - - OutDevices[I] = Device->SubDevices[I]; - // reusing the same pi_device needs to increment the reference count - urDeviceRetain(OutDevices[I]); - } - - if (NumDevicesRet) { - *NumDevicesRet = EffectiveNumDevices; - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceSelectBinary( - ur_device_handle_t - Device, ///< [in] handle of the device to select binary for. - const ur_device_binary_t - *Binaries, ///< [in] the array of binaries to select from. - uint32_t NumBinaries, ///< [in] the number of binaries passed in ppBinaries. - ///< Must greater than or equal to zero otherwise - ///< ::UR_RESULT_ERROR_INVALID_VALUE is returned. - uint32_t - *SelectedBinary ///< [out] the index of the selected binary in the input - ///< array of binaries. If a suitable binary was not - ///< found the function returns ${X}_INVALID_BINARY. -) { - std::ignore = Device; - // TODO: this is a bare-bones implementation for choosing a device image - // that would be compatible with the targeted device. An AOT-compiled - // image is preferred over SPIR-V for known devices (i.e. Intel devices) - // The implementation makes no effort to differentiate between multiple images - // for the given device, and simply picks the first one compatible. - // - // Real implementation will use the same mechanism OpenCL ICD dispatcher - // uses. Something like: - // PI_VALIDATE_HANDLE_RETURN_HANDLE(ctx, PI_ERROR_INVALID_CONTEXT); - // return context->dispatch->piextDeviceSelectIR( - // ctx, images, num_images, selected_image); - // where context->dispatch is set to the dispatch table provided by PI - // plugin for platform/device the ctx was created for. - - // Look for GEN binary, which we known can only be handled by Level-Zero now. - const char *BinaryTarget = - UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; // UR_DEVICE_BINARY_TARGET_SPIRV64_GEN; - - uint32_t *SelectedBinaryInd = SelectedBinary; - - // Find the appropriate device image, fallback to spirv if not found - constexpr uint32_t InvalidInd = (std::numeric_limits::max)(); - uint32_t Spirv = InvalidInd; - - for (uint32_t i = 0; i < NumBinaries; ++i) { - if (strcmp(Binaries[i].pDeviceTargetSpec, BinaryTarget) == 0) { - *SelectedBinaryInd = i; - return UR_RESULT_SUCCESS; - } - if (strcmp(Binaries[i].pDeviceTargetSpec, - UR_DEVICE_BINARY_TARGET_SPIRV64) == 0) - Spirv = i; - } - // Points to a spirv image, if such indeed was found - if ((*SelectedBinaryInd = Spirv) != InvalidInd) - return UR_RESULT_SUCCESS; - - // No image can be loaded for the given device - return UR_RESULT_ERROR_INVALID_BINARY; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle( - ur_device_handle_t Device, ///< [in] handle of the device. - ur_native_handle_t - *NativeDevice ///< [out] a pointer to the native handle of the device. -) { - *NativeDevice = reinterpret_cast(Device->ZeDevice); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( - ur_native_handle_t NativeDevice, ///< [in] the native handle of the device. - ur_platform_handle_t Platform, ///< [in] handle of the platform instance - const ur_device_native_properties_t - *Properties, ///< [in][optional] pointer to native device properties - ///< struct. - ur_device_handle_t - *Device ///< [out] pointer to the handle of the device object created. -) { - std::ignore = Properties; - auto ZeDevice = ur_cast(NativeDevice); - - // The SYCL spec requires that the set of devices must remain fixed for the - // duration of the application's execution. We assume that we found all of the - // Level Zero devices when we initialized the platforms/devices cache, so the - // "NativeHandle" must already be in the cache. If it is not, this must not be - // a valid Level Zero device. - - ur_device_handle_t Dev = nullptr; - if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) { - for (const auto &p : *platforms) { - Dev = p->getDeviceFromNativeHandle(ZeDevice); - if (Dev) { - // Check that the input Platform, if was given, matches the found one. - UR_ASSERT(!Platform || Platform == p.get(), - UR_RESULT_ERROR_INVALID_PLATFORM); - break; - } - } - } else { - return GlobalAdapter->PlatformCache->get_error(); - } - - if (Dev == nullptr) - return UR_RESULT_ERROR_INVALID_VALUE; - - *Device = Dev; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( - ur_device_handle_t Device, ///< [in] handle of the device instance - uint64_t *DeviceTimestamp, ///< [out][optional] pointer to the Device's - ///< global timestamp that correlates with the - ///< Host's global timestamp value - uint64_t *HostTimestamp ///< [out][optional] pointer to the Host's global - ///< timestamp that correlates with the Device's - ///< global timestamp value -) { - const uint64_t &ZeTimerResolution = - Device->ZeDeviceProperties->timerResolution; - const uint64_t TimestampMaxCount = Device->getTimestampMask(); - uint64_t DeviceClockCount, Dummy; - - ZE2UR_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, - HostTimestamp == nullptr ? &Dummy : HostTimestamp, - &DeviceClockCount)); - - if (DeviceTimestamp != nullptr) { - *DeviceTimestamp = - (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; - } - - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/device.hpp b/source/adapters/level_zero/device.hpp index 77ab9a50d8..8e26c08663 100644 --- a/source/adapters/level_zero/device.hpp +++ b/source/adapters/level_zero/device.hpp @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 33495f52b8..0d055919e0 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -18,6 +18,7 @@ #include "common.hpp" #include "event.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" void printZeEventList(const _ur_ze_event_list_t &UrZeEventList) { @@ -232,7 +233,7 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, EventWaitList) && Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { - UR_CALL(urEventRetain(Queue->LastCommandEvent)); + UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent)); *Event = Queue->LastCommandEvent; return UR_RESULT_SUCCESS; } @@ -388,7 +389,9 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( +namespace ur::level_zero { + +ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query size_t PropValueSize, ///< [in] size in bytes of the event property value @@ -471,7 +474,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( +ur_result_t urEventGetProfilingInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_profiling_info_t PropName, ///< [in] the name of the profiling property to query @@ -661,117 +664,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( return UR_RESULT_SUCCESS; } -ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( - bool Blocking, ///< [in] blocking or non-blocking enqueue - uint32_t NumEventsInWaitList, ///< [in] size of the event wait list - const ur_event_handle_t - *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before this command can be executed. If nullptr, - ///< the numEventsInWaitList must be 0, indicating - ///< that this command does not wait on any event to - ///< complete. - ur_event_handle_t - *OutEvent ///< [in,out] return an event object that identifies - ///< this particular command instance. -) { - auto Queue = this; - // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); - - ur_device_handle_t Device = Queue->Device; - - bool UseCopyEngine = false; - _ur_ze_event_list_t TmpWaitList; - UR_CALL(TmpWaitList.createAndRetainUrZeEventList( - NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); - - // Get a new command list to be used on this call - ur_command_list_ptr_t CommandList{}; - UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - /* AllowBatching */ false)); - - UR_CALL(createEventAndAssociateQueue( - Queue, OutEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP, CommandList, - /* IsInternal */ false, /* HostVisible */ true)); - ze_event_handle_t ZeEvent = (*OutEvent)->ZeEvent; - (*OutEvent)->WaitList = TmpWaitList; - - uint64_t DeviceStartTimestamp = 0; - UR_CALL(urDeviceGetGlobalTimestamps(Device, &DeviceStartTimestamp, nullptr)); - (*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp; - - // Create a new entry in the queue's recordings. - Queue->EndTimeRecordings[*OutEvent] = - ur_queue_handle_legacy_t_::end_time_recording{}; - - ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, - (CommandList->first, - &Queue->EndTimeRecordings[*OutEvent].RecordEventEndTimestamp, - ZeEvent, (*OutEvent)->WaitList.Length, - (*OutEvent)->WaitList.ZeEventList)); - - UR_CALL( - Queue->executeCommandList(CommandList, Blocking, /* OkToBatch */ false)); - - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( - ze_event_handle_t &ZeHostVisibleEvent) { - auto UrQueue = Legacy(this->UrQueue); - - std::scoped_lock Lock(UrQueue->Mutex, - this->Mutex); - - if (!HostVisibleEvent) { - this->IsCreatingHostProxyEvent = true; - if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) - die("getOrCreateHostVisibleEvent: missing host-visible event"); - - // Submit the command(s) signalling the proxy event to the queue. - // We have to first submit a wait for the device-only event for which this - // proxy is created. - // - // Get a new command list to be used on this call - - // We want to batch these commands to avoid extra submissions (costly) - bool OkToBatch = true; - - ur_command_list_ptr_t CommandList{}; - UR_CALL(UrQueue->Context->getAvailableCommandList( - UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) - - // Create a "proxy" host-visible event. - UR_CALL(createEventAndAssociateQueue( - UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, - /* IsInternal */ false, /* IsMultiDevice */ false, - /* HostVisible */ true)); - - if (this->IsInnerBatchedEvent) { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CommandList->first, ZeEvent, 0, nullptr)); - } else { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (CommandList->first, 1, &ZeEvent)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CommandList->first, HostVisibleEvent->ZeEvent)); - - UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) - this->IsCreatingHostProxyEvent = false; - } - - ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urEventWait( - uint32_t NumEvents, ///< [in] number of events in the event list - const ur_event_handle_t - *EventWaitList ///< [in][range(0, numEvents)] pointer to a list of - ///< events to wait for completion +ur_result_t +urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list + const ur_event_handle_t + *EventWaitList ///< [in][range(0, numEvents)] pointer to a list + ///< of events to wait for completion ) { for (uint32_t I = 0; I < NumEvents; I++) { auto e = EventWaitList[I]; @@ -859,8 +756,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRetain(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal++; Event->RefCount.increment(); @@ -868,8 +765,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( - ur_event_handle_t Event ///< [in] handle of the event object +ur_result_t +urEventRelease(ur_event_handle_t Event ///< [in] handle of the event object ) { Event->RefCountExternal--; UR_CALL(urEventReleaseInternal(Event)); @@ -877,7 +774,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( +ur_result_t urEventGetNativeHandle( ur_event_handle_t Event, ///< [in] handle of the event. ur_native_handle_t *NativeEvent ///< [out] a pointer to the native handle of the event. @@ -902,7 +799,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( +ur_result_t urExtEventCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. @@ -915,7 +812,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( +ur_result_t urEventCreateWithNativeHandle( ur_native_handle_t NativeEvent, ///< [in] the native handle of the event. ur_context_handle_t Context, ///< [in] handle of the context object const ur_event_native_properties_t *Properties, @@ -965,7 +862,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( +ur_result_t urEventSetCallback( ur_event_handle_t Event, ///< [in] handle of the event object ur_execution_info_t ExecStatus, ///< [in] execution status of the event ur_event_callback_t Notify, ///< [in] execution status of the event @@ -981,6 +878,115 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero + +ur_result_t ur_queue_handle_legacy_t_::enqueueTimestampRecordingExp( + bool Blocking, ///< [in] blocking or non-blocking enqueue + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating + ///< that this command does not wait on any event to + ///< complete. + ur_event_handle_t + *OutEvent ///< [in,out] return an event object that identifies + ///< this particular command instance. +) { + auto Queue = this; + // Lock automatically releases when this goes out of scope. + std::scoped_lock lock(Queue->Mutex); + + ur_device_handle_t Device = Queue->Device; + + bool UseCopyEngine = false; + _ur_ze_event_list_t TmpWaitList; + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( + NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); + + // Get a new command list to be used on this call + ur_command_list_ptr_t CommandList{}; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + /* AllowBatching */ false)); + + UR_CALL(createEventAndAssociateQueue( + Queue, OutEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP, CommandList, + /* IsInternal */ false, /* HostVisible */ true)); + ze_event_handle_t ZeEvent = (*OutEvent)->ZeEvent; + (*OutEvent)->WaitList = TmpWaitList; + + uint64_t DeviceStartTimestamp = 0; + UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps( + Device, &DeviceStartTimestamp, nullptr)); + (*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp; + + // Create a new entry in the queue's recordings. + Queue->EndTimeRecordings[*OutEvent] = + ur_queue_handle_legacy_t_::end_time_recording{}; + + ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, + (CommandList->first, + &Queue->EndTimeRecordings[*OutEvent].RecordEventEndTimestamp, + ZeEvent, (*OutEvent)->WaitList.Length, + (*OutEvent)->WaitList.ZeEventList)); + + UR_CALL( + Queue->executeCommandList(CommandList, Blocking, /* OkToBatch */ false)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( + ze_event_handle_t &ZeHostVisibleEvent) { + auto UrQueue = Legacy(this->UrQueue); + + std::scoped_lock Lock(UrQueue->Mutex, + this->Mutex); + + if (!HostVisibleEvent) { + this->IsCreatingHostProxyEvent = true; + if (UrQueue->ZeEventsScope != OnDemandHostVisibleProxy) + die("getOrCreateHostVisibleEvent: missing host-visible event"); + + // Submit the command(s) signalling the proxy event to the queue. + // We have to first submit a wait for the device-only event for which this + // proxy is created. + // + // Get a new command list to be used on this call + + // We want to batch these commands to avoid extra submissions (costly) + bool OkToBatch = true; + + ur_command_list_ptr_t CommandList{}; + UR_CALL(UrQueue->Context->getAvailableCommandList( + UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) + + // Create a "proxy" host-visible event. + UR_CALL(createEventAndAssociateQueue( + UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList, + /* IsInternal */ false, /* IsMultiDevice */ false, + /* HostVisible */ true)); + + if (this->IsInnerBatchedEvent) { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CommandList->first, ZeEvent, 0, nullptr)); + } else { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CommandList->first, 1, &ZeEvent)); + } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CommandList->first, HostVisibleEvent->ZeEvent)); + + UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) + this->IsCreatingHostProxyEvent = false; + } + + ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; + return UR_RESULT_SUCCESS; +} + ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { if (!Event->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -1156,7 +1162,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, // We've reset event data members above, now cleanup resources. if (AssociatedKernel) { ReleaseIndirectMem(AssociatedKernel); - UR_CALL(urKernelRelease(AssociatedKernel)); + UR_CALL(ur::level_zero::urKernelRelease(AssociatedKernel)); } if (AssociatedQueue) { @@ -1215,7 +1221,7 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, } if (DepEventKernel) { ReleaseIndirectMem(DepEventKernel); - UR_CALL(urKernelRelease(DepEventKernel)); + UR_CALL(ur::level_zero::urKernelRelease(DepEventKernel)); } UR_CALL(urEventReleaseInternal(DepEvent)); } diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index e99df2a272..d3a5499287 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index c04a062218..8bc75c592c 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -14,6 +14,7 @@ #include "event.hpp" #include "logger/ur_logger.hpp" #include "sampler.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" typedef ze_result_t(ZE_APICALL *zeImageGetDeviceOffsetExp_pfn)( @@ -629,127 +630,6 @@ getImageFormatTypeAndSize(const ur_image_format_t *ImageFormat) { return {ZeImageFormatType, ZeImageFormatTypeSize}; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool, - size_t widthInBytes, size_t height, size_t elementSizeBytes, void **ppMem, - size_t *pResultPitch) { - std::shared_lock Lock(hContext->Mutex); - - UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(widthInBytes != 0, UR_RESULT_ERROR_INVALID_USM_SIZE); - UR_ASSERT(ppMem && pResultPitch, UR_RESULT_ERROR_INVALID_NULL_POINTER); - - static std::once_flag InitFlag; - std::call_once(InitFlag, [&]() { - ze_driver_handle_t DriverHandle = hContext->getPlatform()->ZeDriver; - auto Result = zeDriverGetExtensionFunctionAddress( - DriverHandle, "zeMemGetPitchFor2dImage", - (void **)&zeMemGetPitchFor2dImageFunctionPtr); - if (Result != ZE_RESULT_SUCCESS) - logger::error( - "zeDriverGetExtensionFunctionAddress zeMemGetPitchFor2dImage " - "failed, err = {}", - Result); - }); - if (!zeMemGetPitchFor2dImageFunctionPtr) - return UR_RESULT_ERROR_INVALID_OPERATION; - - size_t Width = widthInBytes / elementSizeBytes; - size_t RowPitch; - ze_device_handle_t ZeDeviceTranslated; - ZE2UR_CALL(zelLoaderTranslateHandle, (ZEL_HANDLE_DEVICE, hDevice->ZeDevice, - (void **)&ZeDeviceTranslated)); - ZE2UR_CALL(zeMemGetPitchFor2dImageFunctionPtr, - (hContext->ZeContext, ZeDeviceTranslated, Width, height, - elementSizeBytes, &RowPitch)); - *pResultPitch = RowPitch; - - size_t Size = height * RowPitch; - UR_CALL(urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, Size, ppMem)); - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesUnsampledImageHandleDestroyExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_native_handle_t hImage) { - std::ignore = hContext; - std::ignore = hDevice; - std::ignore = hImage; - - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL -urBindlessImagesSampledImageHandleDestroyExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_native_handle_t hImage) { - // Sampled image is a combination of unsampled image and sampler. - // Sampler is released in urSamplerRelease. - return urBindlessImagesUnsampledImageHandleDestroyExp(hContext, hDevice, - hImage); -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - ur_exp_image_mem_native_handle_t *phImageMem) { - std::shared_lock Lock(hContext->Mutex); - - UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(pImageFormat && pImageDesc && phImageMem, - UR_RESULT_ERROR_INVALID_NULL_POINTER); - - ZeStruct ZeImageDesc; - UR_CALL(ur2zeImageDesc(pImageFormat, pImageDesc, ZeImageDesc)); - - ze_image_bindless_exp_desc_t ZeImageBindlessDesc; - ZeImageBindlessDesc.stype = ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC; - ZeImageBindlessDesc.pNext = nullptr; - ZeImageBindlessDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS; - ZeImageDesc.pNext = &ZeImageBindlessDesc; - - ze_image_handle_t ZeImage; - ZE2UR_CALL(zeImageCreate, - (hContext->ZeContext, hDevice->ZeDevice, &ZeImageDesc, &ZeImage)); - ZE2UR_CALL(zeContextMakeImageResident, - (hContext->ZeContext, hDevice->ZeDevice, ZeImage)); - UR_CALL(createUrMemFromZeImage(hContext, ZeImage, /*OwnZeMemHandle*/ true, - ZeImageDesc, phImageMem)); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem) { - std::ignore = hContext; - std::ignore = hDevice; - UR_CALL(urMemRelease(reinterpret_cast(hImageMem))); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - ur_exp_image_native_handle_t *phImage) { - UR_CALL(bindlessImagesCreateImpl(hContext, hDevice, hImageMem, pImageFormat, - pImageDesc, nullptr, phImage)); - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hImageMem, - const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, - ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage) { - UR_CALL(bindlessImagesCreateImpl(hContext, hDevice, hImageMem, pImageFormat, - pImageDesc, hSampler, phImage)); - return UR_RESULT_SUCCESS; -} - ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_copy_flags_t imageCopyFlags, @@ -891,7 +771,132 @@ ur_result_t ur_queue_handle_legacy_t_::bindlessImagesImageCopyExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( +namespace ur::level_zero { + +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch) { + std::shared_lock Lock(hContext->Mutex); + + UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(widthInBytes != 0, UR_RESULT_ERROR_INVALID_USM_SIZE); + UR_ASSERT(ppMem && pResultPitch, UR_RESULT_ERROR_INVALID_NULL_POINTER); + + static std::once_flag InitFlag; + std::call_once(InitFlag, [&]() { + ze_driver_handle_t DriverHandle = hContext->getPlatform()->ZeDriver; + auto Result = zeDriverGetExtensionFunctionAddress( + DriverHandle, "zeMemGetPitchFor2dImage", + (void **)&zeMemGetPitchFor2dImageFunctionPtr); + if (Result != ZE_RESULT_SUCCESS) + logger::error( + "zeDriverGetExtensionFunctionAddress zeMemGetPitchFor2dImage " + "failed, err = {}", + Result); + }); + if (!zeMemGetPitchFor2dImageFunctionPtr) + return UR_RESULT_ERROR_INVALID_OPERATION; + + size_t Width = widthInBytes / elementSizeBytes; + size_t RowPitch; + ze_device_handle_t ZeDeviceTranslated; + ZE2UR_CALL(zelLoaderTranslateHandle, (ZEL_HANDLE_DEVICE, hDevice->ZeDevice, + (void **)&ZeDeviceTranslated)); + ZE2UR_CALL(zeMemGetPitchFor2dImageFunctionPtr, + (hContext->ZeContext, ZeDeviceTranslated, Width, height, + elementSizeBytes, &RowPitch)); + *pResultPitch = RowPitch; + + size_t Size = height * RowPitch; + UR_CALL(ur::level_zero::urUSMDeviceAlloc(hContext, hDevice, pUSMDesc, pool, + Size, ppMem)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage) { + std::ignore = hContext; + std::ignore = hDevice; + std::ignore = hImage; + + return UR_RESULT_SUCCESS; +} + +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage) { + // Sampled image is a combination of unsampled image and sampler. + // Sampler is released in urSamplerRelease. + return ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp( + hContext, hDevice, hImage); +} + +ur_result_t urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_native_handle_t *phImageMem) { + std::shared_lock Lock(hContext->Mutex); + + UR_ASSERT(hContext && hDevice, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(pImageFormat && pImageDesc && phImageMem, + UR_RESULT_ERROR_INVALID_NULL_POINTER); + + ZeStruct ZeImageDesc; + UR_CALL(ur2zeImageDesc(pImageFormat, pImageDesc, ZeImageDesc)); + + ze_image_bindless_exp_desc_t ZeImageBindlessDesc; + ZeImageBindlessDesc.stype = ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC; + ZeImageBindlessDesc.pNext = nullptr; + ZeImageBindlessDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS; + ZeImageDesc.pNext = &ZeImageBindlessDesc; + + ze_image_handle_t ZeImage; + ZE2UR_CALL(zeImageCreate, + (hContext->ZeContext, hDevice->ZeDevice, &ZeImageDesc, &ZeImage)); + ZE2UR_CALL(zeContextMakeImageResident, + (hContext->ZeContext, hDevice->ZeDevice, ZeImage)); + UR_CALL(createUrMemFromZeImage(hContext, ZeImage, /*OwnZeMemHandle*/ true, + ZeImageDesc, phImageMem)); + return UR_RESULT_SUCCESS; +} + +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem) { + std::ignore = hContext; + std::ignore = hDevice; + UR_CALL(ur::level_zero::urMemRelease( + reinterpret_cast(hImageMem))); + return UR_RESULT_SUCCESS; +} + +ur_result_t urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_native_handle_t *phImage) { + UR_CALL(bindlessImagesCreateImpl(hContext, hDevice, hImageMem, pImageFormat, + pImageDesc, nullptr, phImage)); + return UR_RESULT_SUCCESS; +} + +ur_result_t urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage) { + UR_CALL(bindlessImagesCreateImpl(hContext, hDevice, hImageMem, pImageFormat, + pImageDesc, hSampler, phImage)); + return UR_RESULT_SUCCESS; +} + +ur_result_t urBindlessImagesImageGetInfoExp( ur_context_handle_t, ur_exp_image_mem_native_handle_t hImageMem, ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet) { UR_ASSERT(hImageMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -941,7 +946,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( } } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( +ur_result_t urBindlessImagesMipmapGetLevelExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, ur_exp_image_mem_native_handle_t *phImageMem) { @@ -955,13 +960,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_image_mem_native_handle_t hMem) { - return urBindlessImagesImageFreeExp(hContext, hDevice, hMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem) { + return ur::level_zero::urBindlessImagesImageFreeExp(hContext, hDevice, hMem); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( +ur_result_t urBindlessImagesImportExternalMemoryExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, ur_exp_external_mem_type_t memHandleType, ur_exp_interop_mem_desc_t *pInteropMemDesc, @@ -1019,7 +1025,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( +ur_result_t urBindlessImagesMapExternalArrayExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_interop_mem_handle_t hInteropMem, @@ -1054,9 +1060,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_exp_interop_mem_handle_t hInteropMem) { +ur_result_t +urBindlessImagesReleaseInteropExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_interop_mem_handle_t hInteropMem) { UR_ASSERT(hContext && hDevice && hInteropMem, UR_RESULT_ERROR_INVALID_NULL_HANDLE); @@ -1064,7 +1071,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( struct ur_ze_external_memory_data *externalMemoryData = reinterpret_cast(hInteropMem); - UR_CALL(urMemRelease(externalMemoryData->urMemoryHandle)); + UR_CALL(ur::level_zero::urMemRelease(externalMemoryData->urMemoryHandle)); switch (externalMemoryData->type) { case UR_ZE_EXTERNAL_OPAQUE_FD: @@ -1084,7 +1091,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesReleaseInteropExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( +ur_result_t urBindlessImagesImportExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_external_semaphore_type_t semHandleType, ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, @@ -1099,7 +1106,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( +ur_result_t urBindlessImagesDestroyExternalSemaphoreExp( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_exp_interop_semaphore_handle_t hInteropSemaphore) { std::ignore = hContext; @@ -1110,6 +1117,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesDestroyExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero + ur_result_t ur_queue_handle_legacy_t_::bindlessImagesWaitExternalSemaphoreExp( ur_exp_interop_semaphore_handle_t hSemaphore, bool hasValue, uint64_t waitValue, uint32_t numEventsInWaitList, diff --git a/source/adapters/level_zero/image.hpp b/source/adapters/level_zero/image.hpp index 618258601d..43f37fa757 100644 --- a/source/adapters/level_zero/image.hpp +++ b/source/adapters/level_zero/image.hpp @@ -10,7 +10,7 @@ #pragma once #include -#include +#include #include #include diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index af0dfdd0d5..9886e63ae2 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -11,30 +11,7 @@ #include "kernel.hpp" #include "logger/ur_logger.hpp" #include "ur_api.h" -#include "ur_level_zero.hpp" - -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( - ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim, - [[maybe_unused]] const size_t *pGlobalWorkOffset, - const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) { - UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(pSuggestedLocalWorkSize != nullptr, - UR_RESULT_ERROR_INVALID_NULL_POINTER); - - uint32_t LocalWorkSize[3]; - size_t GlobalWorkSize3D[3]{1, 1, 1}; - std::copy(pGlobalWorkSize, pGlobalWorkSize + workDim, GlobalWorkSize3D); - - ze_kernel_handle_t ZeKernel{}; - UR_CALL(getZeKernel(Legacy(hQueue), hKernel, &ZeKernel)); - - UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue), ZeKernel, GlobalWorkSize3D, - LocalWorkSize)); - - std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize); - return UR_RESULT_SUCCESS; -} +#include "ur_interface_loader.hpp" ur_result_t getZeKernel(ur_queue_handle_legacy_t hQueue, ur_kernel_handle_t hKernel, @@ -264,7 +241,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunch( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -528,7 +505,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueCooperativeKernelLaunchExp( // is in use. Once the event has been signalled, the code in // CleanupCompletedEvent(Event) will do a urKernelRelease to update the // reference count on the kernel, using the kernel saved in CommandData. - UR_CALL(urKernelRetain(Kernel)); + UR_CALL(ur::level_zero::urKernelRetain(Kernel)); // Add to list of kernels to be submitted if (IndirectAccessTrackingEnabled) @@ -673,7 +650,32 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableRead( EventWaitList, Event, PreferCopyEngine); } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( +namespace ur::level_zero { + +ur_result_t urKernelGetSuggestedLocalWorkSize( + ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim, + [[maybe_unused]] const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, size_t *pSuggestedLocalWorkSize) { + UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(pSuggestedLocalWorkSize != nullptr, + UR_RESULT_ERROR_INVALID_NULL_POINTER); + + uint32_t LocalWorkSize[3]; + size_t GlobalWorkSize3D[3]{1, 1, 1}; + std::copy(pGlobalWorkSize, pGlobalWorkSize + workDim, GlobalWorkSize3D); + + ze_kernel_handle_t ZeKernel{}; + UR_CALL(getZeKernel(Legacy(hQueue), hKernel, &ZeKernel)); + + UR_CALL(getSuggestedLocalWorkSize(Legacy(hQueue), ZeKernel, GlobalWorkSize3D, + LocalWorkSize)); + + std::copy(LocalWorkSize, LocalWorkSize + workDim, pSuggestedLocalWorkSize); + return UR_RESULT_SUCCESS; +} + +ur_result_t urKernelCreate( ur_program_handle_t Program, ///< [in] handle of the program instance const char *KernelName, ///< [in] pointer to null-terminated string. ur_kernel_handle_t @@ -729,7 +731,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( +ur_result_t urKernelSetArgValue( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of argument type @@ -770,7 +772,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( +ur_result_t urKernelSetArgLocal( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] size_t ArgSize, ///< [in] size of the local buffer to be allocated by the @@ -780,12 +782,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( ) { std::ignore = Properties; - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, ArgSize, nullptr, nullptr)); + UR_CALL(ur::level_zero::urKernelSetArgValue(Kernel, ArgIndex, ArgSize, + nullptr, nullptr)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( +ur_result_t urKernelGetInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_kernel_info_t ParamName, ///< [in] name of the Kernel property to query size_t PropSize, ///< [in] the size of the Kernel property value. @@ -846,7 +849,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( +ur_result_t urKernelGetGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_group_info_t @@ -927,7 +930,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( +ur_result_t urKernelGetSubGroupInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the Kernel object ur_device_handle_t Device, ///< [in] handle of the Device object ur_kernel_sub_group_info_t @@ -958,7 +961,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSubGroupInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( +ur_result_t urKernelRetain( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to retain ) { Kernel->RefCount.increment(); @@ -966,7 +969,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( +ur_result_t urKernelRelease( ur_kernel_handle_t Kernel ///< [in] handle for the Kernel to release ) { if (!Kernel->RefCount.decrementAndTest()) @@ -983,7 +986,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( } Kernel->ZeKernelMap.clear(); if (IndirectAccessTrackingEnabled) { - UR_CALL(urContextRelease(KernelProgram->Context)); + UR_CALL(ur::level_zero::urContextRelease(KernelProgram->Context)); } // do a release on the program this kernel was part of without delete of the // program handle @@ -994,7 +997,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( +ur_result_t urKernelSetArgPointer( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_pointer_properties_t @@ -1006,12 +1009,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( std::ignore = Properties; // KernelSetArgValue is expecting a pointer to the argument - UR_CALL(urKernelSetArgValue(Kernel, ArgIndex, sizeof(const void *), nullptr, - &ArgValue)); + UR_CALL(ur::level_zero::urKernelSetArgValue( + Kernel, ArgIndex, sizeof(const void *), nullptr, &ArgValue)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( +ur_result_t urKernelSetExecInfo( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object ur_kernel_exec_info_t PropName, ///< [in] name of the execution attribute size_t PropSize, ///< [in] size in byte the attribute value @@ -1057,7 +1060,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( +ur_result_t urKernelSetArgSampler( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_sampler_properties_t @@ -1072,7 +1075,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( +ur_result_t urKernelSetArgMemObj( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] const ur_kernel_arg_mem_obj_properties_t @@ -1110,7 +1113,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( +ur_result_t urKernelGetNativeHandle( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel. ur_native_handle_t *NativeKernel ///< [out] a pointer to the native handle of the kernel. @@ -1121,7 +1124,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( ur_kernel_handle_t hKernel, size_t localWorkSize, size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet) { (void)localWorkSize; @@ -1134,7 +1137,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( +ur_result_t urKernelCreateWithNativeHandle( ur_native_handle_t NativeKernel, ///< [in] the native handle of the kernel. ur_context_handle_t Context, ///< [in] handle of the context object ur_program_handle_t Program, @@ -1167,13 +1170,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in] array of specialization constant value + ///< descriptions +) { + std::ignore = Kernel; + std::ignore = Count; + std::ignore = SpecConstants; + logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), + "{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +} // namespace ur::level_zero + ur_result_t ur_kernel_handle_t_::initialize() { // Retain the program and context to show it's used by this kernel. - UR_CALL(urProgramRetain(Program)); + UR_CALL(ur::level_zero::urProgramRetain(Program)); if (IndirectAccessTrackingEnabled) // TODO: do piContextRetain without the guard - UR_CALL(urContextRetain(Program->Context)); + UR_CALL(ur::level_zero::urContextRetain(Program->Context)); // Set up how to obtain kernel properties when needed. ZeKernelProperties.Compute = [this](ze_kernel_properties_t &Properties) { @@ -1193,21 +1213,6 @@ ur_result_t ur_kernel_handle_t_::initialize() { return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants( - ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in] array of specialization constant value - ///< descriptions -) { - std::ignore = Kernel; - std::ignore = Count; - std::ignore = SpecConstants; - logger::error(logger::LegacyMessage("[UR][L0] {} function not implemented!"), - "{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp( ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, @@ -1224,4 +1229,4 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueKernelLaunchCustomExp( std::ignore = phEventWaitList; std::ignore = phEvent; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} +} \ No newline at end of file diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 95650a7b94..d327b2b790 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -18,6 +18,7 @@ #include "image.hpp" #include "logger/ur_logger.hpp" #include "queue.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" // Default to using compute engine for fill operation, but allow to @@ -302,7 +303,7 @@ static ur_result_t ZeHostMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(UrContext)); + UR_CALL(ur::level_zero::urContextRetain(UrContext)); } ZeStruct ZeDesc; @@ -951,10 +952,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemBufferMap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); // Lock automatically releases when this goes out of scope. std::scoped_lock Guard(Buffer->Mutex); @@ -1107,10 +1108,10 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueMemUnmap( if (Buffer->OnHost) { // Wait on incoming events before doing the copy if (NumEventsInWaitList > 0) - UR_CALL(urEventWait(NumEventsInWaitList, EventWaitList)); + UR_CALL(ur::level_zero::urEventWait(NumEventsInWaitList, EventWaitList)); if (Queue->isInOrderQueue()) - UR_CALL(urQueueFinish(Queue)); + UR_CALL(ur::level_zero::urQueueFinish(Queue)); char *ZeHandleDst; UR_CALL(Buffer->getZeHandle(ZeHandleDst, ur_mem_handle_t_::write_only, @@ -1475,7 +1476,9 @@ static ur_result_t ur2zeImageDesc(const ur_image_format_t *ImageFormat, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( +namespace ur::level_zero { + +ur_result_t urMemImageCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags const ur_image_format_t @@ -1524,7 +1527,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( +ur_result_t urMemImageCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. [[maybe_unused]] const ur_image_format_t @@ -1552,7 +1555,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( +ur_result_t urMemBufferCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_mem_flags_t Flags, ///< [in] allocation and usage information flags size_t Size, ///< [in] size in bytes of the memory object to be allocated @@ -1646,14 +1649,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRetain( +ur_result_t urMemRetain( ur_mem_handle_t Mem ///< [in] handle of the memory object to get access ) { Mem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( +ur_result_t urMemRelease( ur_mem_handle_t Mem ///< [in] handle of the memory object to release ) { if (!Mem->RefCount.decrementAndTest()) @@ -1679,7 +1682,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( +ur_result_t urMemBufferPartition( ur_mem_handle_t Buffer, ///< [in] handle of the buffer object to allocate from ur_mem_flags_t Flags, ///< [in] allocation and usage information flags @@ -1715,7 +1718,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( +ur_result_t urMemGetNativeHandle( ur_mem_handle_t Mem, ///< [in] handle of the mem. ur_device_handle_t, ///< [in] handle of the device. ur_native_handle_t @@ -1729,7 +1732,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( +ur_result_t urMemBufferCreateWithNativeHandle( ur_native_handle_t NativeMem, ///< [in] the native handle to the memory. ur_context_handle_t Context, ///< [in] handle of the context object. const ur_mem_native_properties_t @@ -1796,7 +1799,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ContextsLock.lock(); // Retain context to be sure that it is released after all memory // allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); Context->MemAllocs.emplace(std::piecewise_construct, std::forward_as_tuple(Ptr), @@ -1832,7 +1835,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( +ur_result_t urMemGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the memory object being queried. ur_mem_info_t MemInfoType, ///< [in] type of the info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointed to by @@ -1868,7 +1871,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( +ur_result_t urMemImageGetInfo( ur_mem_handle_t Memory, ///< [in] handle to the image object being queried. ur_image_info_t ImgInfoType, ///< [in] type of image info to retrieve. size_t PropSize, ///< [in] the number of bytes of memory pointer to by @@ -1890,6 +1893,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero // If indirect access tracking is enabled then performs reference counting, // otherwise just calls zeMemAllocDevice. @@ -1910,7 +1914,7 @@ static ur_result_t ZeDeviceMemAllocHelper(void **ResultPtr, // indirect access, that is why explicitly retain context to be sure // that it is released after all memory allocations in this context are // released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } ze_device_mem_alloc_desc_t ZeDesc = {}; @@ -1970,8 +1974,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMHostAlloc( + UrContext, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2029,8 +2034,9 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL(urUSMDeviceAlloc(UrContext, Device, &USMDesc, Pool, Size, - reinterpret_cast(&ZeHandle))); + UR_CALL(ur::level_zero::urUSMDeviceAlloc( + UrContext, Device, &USMDesc, Pool, Size, + reinterpret_cast(&ZeHandle))); } else { Allocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeDeviceMemAllocHelper(reinterpret_cast(&ZeHandle), @@ -2093,8 +2099,8 @@ ur_result_t _ur_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode, ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - UR_CALL( - urUSMHostAlloc(UrContext, &USMDesc, Pool, Size, &ZeHandleHost)); + UR_CALL(ur::level_zero::urUSMHostAlloc(UrContext, &USMDesc, Pool, + Size, &ZeHandleHost)); } else { HostAllocation.ReleaseAction = allocation_t::free_native; UR_CALL(ZeHostMemAllocHelper(&ZeHandleHost, UrContext, Size)); diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index b590165947..58b88e7310 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/physical_mem.cpp b/source/adapters/level_zero/physical_mem.cpp index d4d9792f24..e7bb498859 100644 --- a/source/adapters/level_zero/physical_mem.cpp +++ b/source/adapters/level_zero/physical_mem.cpp @@ -14,7 +14,9 @@ #include "device.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( +namespace ur::level_zero { + +ur_result_t urPhysicalMemCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, [[maybe_unused]] const ur_physical_mem_properties_t *pProperties, ur_physical_mem_handle_t *phPhysicalMem) { @@ -35,14 +37,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urPhysicalMemCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem) { hPhysicalMem->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { if (!hPhysicalMem->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -52,3 +52,4 @@ urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem) { return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 10b8ef5756..f2becabff2 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -12,7 +12,9 @@ #include "adapter.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( +namespace ur::level_zero { + +ur_result_t urPlatformGet( ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ///< [in] the number of platforms to be added to ///< phPlatforms. If phPlatforms is not NULL, then @@ -47,7 +49,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( +ur_result_t urPlatformGetInfo( ur_platform_handle_t Platform, ///< [in] handle of the platform ur_platform_info_t ParamName, ///< [in] type of the info to retrieve size_t Size, ///< [in] the number of bytes pointed to by pPlatformInfo. @@ -101,7 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( +ur_result_t urPlatformGetApiVersion( ur_platform_handle_t Driver, ///< [in] handle of the platform ur_api_version_t *Version ///< [out] api version ) { @@ -110,7 +112,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetApiVersion( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( +ur_result_t urPlatformGetNativeHandle( ur_platform_handle_t Platform, ///< [in] handle of the platform. ur_native_handle_t *NativePlatform ///< [out] a pointer to the native ///< handle of the platform. @@ -120,7 +122,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( +ur_result_t urPlatformCreateWithNativeHandle( ur_native_handle_t NativePlatform, ///< [in] the native handle of the platform. ur_adapter_handle_t, @@ -135,12 +137,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( uint32_t NumPlatforms = 0; ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, 0, nullptr, &NumPlatforms)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, 0, nullptr, + &NumPlatforms)); if (NumPlatforms) { std::vector Platforms(NumPlatforms); - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), - nullptr)); + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumPlatforms, + Platforms.data(), nullptr)); // The SYCL spec requires that the set of platforms must remain fixed for // the duration of the application's execution. We assume that we found all @@ -158,6 +161,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( return UR_RESULT_ERROR_INVALID_VALUE; } +// Returns plugin specific backend option. +// Current support is only for optimization options. +// Return '-ze-opt-disable' for frontend_option = -O0. +// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. +// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for +// frontend_option=-ftarget-compile-fast. +ur_result_t urPlatformGetBackendOption( + ur_platform_handle_t Platform, ///< [in] handle of the platform instance. + const char *FrontendOption, ///< [in] string containing the frontend option. + const char * + *PlatformOption ///< [out] returns the correct platform specific + ///< compiler option based on the frontend option. +) { + std::ignore = Platform; + using namespace std::literals; + if (FrontendOption == nullptr) { + return UR_RESULT_SUCCESS; + } + if (FrontendOption == ""sv) { + *PlatformOption = ""; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O0"sv) { + *PlatformOption = "-ze-opt-disable"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || + FrontendOption == "-O3"sv) { + *PlatformOption = "-ze-opt-level=2"; + return UR_RESULT_SUCCESS; + } + if (FrontendOption == "-ftarget-compile-fast"sv) { + *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_INVALID_VALUE; +} +} // namespace ur::level_zero ur_result_t ur_platform_handle_t_::initialize() { ZE2UR_CALL(zeDriverGetApiVersion, (ZeDriver, &ZeApiVersion)); ZeDriverApiVersion = std::to_string(ZE_MAJOR_VERSION(ZeApiVersion)) + "." + @@ -435,41 +476,3 @@ ur_result_t ur_platform_handle_t_::populateDeviceCacheIfNeeded() { DeviceCachePopulated = true; return UR_RESULT_SUCCESS; } - -// Returns plugin specific backend option. -// Current support is only for optimization options. -// Return '-ze-opt-disable' for frontend_option = -O0. -// Return '-ze-opt-level=2' for frontend_option = -O1, -O2 or -O3. -// Return '-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'' for -// frontend_option=-ftarget-compile-fast. -UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetBackendOption( - ur_platform_handle_t Platform, ///< [in] handle of the platform instance. - const char *FrontendOption, ///< [in] string containing the frontend option. - const char * - *PlatformOption ///< [out] returns the correct platform specific - ///< compiler option based on the frontend option. -) { - std::ignore = Platform; - using namespace std::literals; - if (FrontendOption == nullptr) { - return UR_RESULT_SUCCESS; - } - if (FrontendOption == ""sv) { - *PlatformOption = ""; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O0"sv) { - *PlatformOption = "-ze-opt-disable"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-O1"sv || FrontendOption == "-O2"sv || - FrontendOption == "-O3"sv) { - *PlatformOption = "-ze-opt-level=2"; - return UR_RESULT_SUCCESS; - } - if (FrontendOption == "-ftarget-compile-fast"sv) { - *PlatformOption = "-igc_opts 'PartitionUnit=1,SubroutineThreshold=50000'"; - return UR_RESULT_SUCCESS; - } - return UR_RESULT_ERROR_INVALID_VALUE; -} diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index 26c75aef31..9e1b176f65 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -10,6 +10,7 @@ #include "program.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" extern "C" { @@ -48,7 +49,9 @@ checkUnresolvedSymbols(ze_module_handle_t ZeModule, } } // extern "C" -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( +namespace ur::level_zero { + +ur_result_t urProgramCreateWithIL( ur_context_handle_t Context, ///< [in] handle of the context instance const void *IL, ///< [in] pointer to IL binary. size_t Length, ///< [in] length of `pIL` in bytes. @@ -71,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( +ur_result_t urProgramCreateWithBinary( ur_context_handle_t Context, ///< [in] handle of the context instance ur_device_handle_t Device, ///< [in] handle to device associated with binary. @@ -108,17 +111,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild( +ur_result_t urProgramBuild( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in] Handle of the program to build. const char *Options ///< [in][optional] pointer to build options ///< null-terminated string. ) { - return urProgramBuildExp(Program, Context->Devices.size(), - Context->Devices.data(), Options); + return ur::level_zero::urProgramBuildExp(Program, Context->Devices.size(), + Context->Devices.data(), Options); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( +ur_result_t urProgramBuildExp( ur_program_handle_t hProgram, ///< [in] Handle of the program to build. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -221,7 +224,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp( return Result; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( +ur_result_t urProgramCompileExp( ur_program_handle_t hProgram, ///< [in][out] handle of the program to compile. uint32_t numDevices, ///< [in] number of devices @@ -232,10 +235,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompileExp( ) { std::ignore = numDevices; std::ignore = phDevices; - return urProgramCompile(hProgram->Context, hProgram, pOptions); + return ur::level_zero::urProgramCompile(hProgram->Context, hProgram, + pOptions); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( +ur_result_t urProgramCompile( ur_context_handle_t Context, ///< [in] handle of the context instance. ur_program_handle_t Program, ///< [in][out] handle of the program to compile. @@ -274,7 +278,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( +ur_result_t urProgramLink( ur_context_handle_t Context, ///< [in] handle of the context instance. uint32_t Count, ///< [in] number of program handles in `phPrograms`. const ur_program_handle_t *Programs, ///< [in][range(0, count)] pointer to @@ -284,12 +288,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLink( ur_program_handle_t *Program ///< [out] pointer to handle of program object created. ) { - return urProgramLinkExp(Context, Context->Devices.size(), - Context->Devices.data(), Count, Programs, Options, - Program); + return ur::level_zero::urProgramLinkExp(Context, Context->Devices.size(), + Context->Devices.data(), Count, + Programs, Options, Program); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( +ur_result_t urProgramLinkExp( ur_context_handle_t hContext, ///< [in] handle of the context instance. uint32_t numDevices, ///< [in] number of devices ur_device_handle_t *phDevices, ///< [in][range(0, numDevices)] pointer to @@ -475,14 +479,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramLinkExp( return UrResult; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRetain( +ur_result_t urProgramRetain( ur_program_handle_t Program ///< [in] handle for the Program to retain ) { Program->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramRelease( +ur_result_t urProgramRelease( ur_program_handle_t Program ///< [in] handle for the Program to release ) { if (!Program->RefCount.decrementAndTest()) @@ -519,7 +523,7 @@ static bool is_in_separated_string(const std::string &str, char delimiter, return false; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( +ur_result_t urProgramGetFunctionPointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve pointer for. ur_program_handle_t @@ -559,12 +563,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( if (ZeResult == ZE_RESULT_ERROR_INVALID_ARGUMENT) { size_t Size; *FunctionPointerRet = 0; - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, - &Size)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size)); std::string ClResult(Size, ' '); - UR_CALL(urProgramGetInfo(Program, UR_PROGRAM_INFO_KERNEL_NAMES, - ClResult.size(), &ClResult[0], nullptr)); + UR_CALL(ur::level_zero::urProgramGetInfo( + Program, UR_PROGRAM_INFO_KERNEL_NAMES, ClResult.size(), &ClResult[0], + nullptr)); // Get rid of the null terminator and search for kernel_name // If function can be found return error code to indicate it @@ -584,7 +589,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( +ur_result_t urProgramGetGlobalVariablePointer( ur_device_handle_t Device, ///< [in] handle of the device to retrieve the pointer for. ur_program_handle_t @@ -611,7 +616,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( return ze2urResult(ZeResult); } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( +ur_result_t urProgramGetInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_program_info_t PropName, ///< [in] name of the Program property to query size_t PropSize, ///< [in] the size of the Program property. @@ -727,7 +732,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( +ur_result_t urProgramGetBuildInfo( ur_program_handle_t Program, ///< [in] handle of the Program object ur_device_handle_t Device, ///< [in] handle of the Device object ur_program_build_info_t @@ -805,7 +810,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( +ur_result_t urProgramSetSpecializationConstant( ur_program_handle_t Program, ///< [in] handle of the Program object uint32_t SpecId, ///< [in] specification constant Id size_t SpecSize, ///< [in] size of the specialization constant value @@ -820,7 +825,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstant( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( +ur_result_t urProgramGetNativeHandle( ur_program_handle_t Program, ///< [in] handle of the program. ur_native_handle_t *NativeProgram ///< [out] a pointer to the native ///< handle of the program. @@ -841,7 +846,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( +ur_result_t urProgramCreateWithNativeHandle( ur_native_handle_t NativeProgram, ///< [in] the native handle of the program. ur_context_handle_t Context, ///< [in] handle of the context instance @@ -871,6 +876,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( return UR_RESULT_SUCCESS; } +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t Program, ///< [in] handle of the Program object + uint32_t Count, ///< [in] the number of elements in the pSpecConstants array + const ur_specialization_constant_info_t + *SpecConstants ///< [in][range(0, count)] array of specialization + ///< constant value descriptions +) { + std::scoped_lock Guard(Program->Mutex); + + // Remember the value of this specialization constant until the program is + // built. Note that we only save the pointer to the buffer that contains the + // value. The caller is responsible for maintaining storage for this buffer. + // + // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by + // SpecID. + for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { + uint32_t SpecId = SpecConstants[SpecIt].id; + Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; + } + return UR_RESULT_SUCCESS; +} + +} // namespace ur::level_zero + ur_program_handle_t_::~ur_program_handle_t_() { if (!resourcesReleased) { ur_release_program_resources(true); @@ -905,25 +934,3 @@ void ur_program_handle_t_::ur_release_program_resources(bool deletion) { resourcesReleased = true; } } - -UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( - ur_program_handle_t Program, ///< [in] handle of the Program object - uint32_t Count, ///< [in] the number of elements in the pSpecConstants array - const ur_specialization_constant_info_t - *SpecConstants ///< [in][range(0, count)] array of specialization - ///< constant value descriptions -) { - std::scoped_lock Guard(Program->Mutex); - - // Remember the value of this specialization constant until the program is - // built. Note that we only save the pointer to the buffer that contains the - // value. The caller is responsible for maintaining storage for this buffer. - // - // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by - // SpecID. - for (uint32_t SpecIt = 0; SpecIt < Count; SpecIt++) { - uint32_t SpecId = SpecConstants[SpecIt].id; - Program->SpecConstants[SpecId] = SpecConstants[SpecIt].pValue; - } - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index ec7e2d5299..f1247ad5a1 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -19,7 +19,7 @@ #include "common.hpp" #include "event.hpp" #include "queue.hpp" -#include "ur_api.h" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" #include "ze_api.h" @@ -469,7 +469,9 @@ static bool doEagerInit = [] { return EagerInit ? std::atoi(EagerInit) != 0 : false; }(); -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( +namespace ur::level_zero { + +ur_result_t urQueueCreate( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_queue_properties_t @@ -584,6 +586,100 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( return UR_RESULT_SUCCESS; } +ur_result_t urQueueCreateWithNativeHandle( + ur_native_handle_t NativeQueue, ///< [in] the native handle of the queue. + ur_context_handle_t Context, ///< [in] handle of the context object + ur_device_handle_t Device, /// + const ur_queue_native_properties_t *NativeProperties, /// + ur_queue_handle_t + *RetQueue ///< [out] pointer to the handle of the queue object created. +) { + bool OwnNativeHandle = false; + ur_queue_flags_t Flags{}; + int32_t NativeHandleDesc{}; + + if (NativeProperties) { + OwnNativeHandle = NativeProperties->isNativeHandleOwned; + void *pNext = NativeProperties->pNext; + while (pNext) { + const ur_base_properties_t *extendedProperties = + reinterpret_cast(pNext); + if (extendedProperties->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { + const ur_queue_properties_t *UrProperties = + reinterpret_cast(extendedProperties); + Flags = UrProperties->flags; + } else if (extendedProperties->stype == + UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC) { + const ur_queue_native_desc_t *UrNativeDesc = + reinterpret_cast( + extendedProperties); + if (UrNativeDesc->pNativeData) + NativeHandleDesc = + *(reinterpret_cast((UrNativeDesc->pNativeData))); + } + pNext = extendedProperties->pNext; + } + } + + // Get the device handle from first device in the platform + // Maybe this is not completely correct. + uint32_t NumEntries = 1; + ur_platform_handle_t Platform{}; + ur_adapter_handle_t AdapterHandle = GlobalAdapter; + UR_CALL(ur::level_zero::urPlatformGet(&AdapterHandle, 1, NumEntries, + &Platform, nullptr)); + + ur_device_handle_t UrDevice = Device; + if (UrDevice == nullptr) { + UR_CALL(ur::level_zero::urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, + NumEntries, &UrDevice, nullptr)); + } + + // The NativeHandleDesc has value if if the native handle is an immediate + // command list. + if (NativeHandleDesc == 1) { + std::vector ComputeQueues{nullptr}; + std::vector CopyQueues; + + try { + ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); + *RetQueue = reinterpret_cast(Queue); + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + auto &InitialGroup = + Legacy(*RetQueue)->ComputeQueueGroupsByTID.begin()->second; + InitialGroup.setImmCmdList(Legacy(*RetQueue), + ur_cast(NativeQueue)); + } else { + auto ZeQueue = ur_cast(NativeQueue); + // Assume this is the "0" index queue in the compute command-group. + std::vector ZeQueues{ZeQueue}; + + // TODO: see what we can do to correctly initialize PI queue for + // compute vs. copy Level-Zero queue. Currently we will send + // all commands to the "ZeQueue". + std::vector ZeroCopyQueues; + + try { + ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( + ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); + *RetQueue = reinterpret_cast(Queue); + } catch (const std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } + Legacy(*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1); + + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + ur_result_t ur_queue_handle_legacy_t_::queueRetain() { auto Queue = this; @@ -753,98 +849,6 @@ void ur_queue_handle_legacy_t_::ur_queue_group_t::setImmCmdList( .first); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( - ur_native_handle_t NativeQueue, ///< [in] the native handle of the queue. - ur_context_handle_t Context, ///< [in] handle of the context object - ur_device_handle_t Device, /// - const ur_queue_native_properties_t *NativeProperties, /// - ur_queue_handle_t - *RetQueue ///< [out] pointer to the handle of the queue object created. -) { - bool OwnNativeHandle = false; - ur_queue_flags_t Flags{}; - int32_t NativeHandleDesc{}; - - if (NativeProperties) { - OwnNativeHandle = NativeProperties->isNativeHandleOwned; - void *pNext = NativeProperties->pNext; - while (pNext) { - const ur_base_properties_t *extendedProperties = - reinterpret_cast(pNext); - if (extendedProperties->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) { - const ur_queue_properties_t *UrProperties = - reinterpret_cast(extendedProperties); - Flags = UrProperties->flags; - } else if (extendedProperties->stype == - UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC) { - const ur_queue_native_desc_t *UrNativeDesc = - reinterpret_cast( - extendedProperties); - if (UrNativeDesc->pNativeData) - NativeHandleDesc = - *(reinterpret_cast((UrNativeDesc->pNativeData))); - } - pNext = extendedProperties->pNext; - } - } - - // Get the device handle from first device in the platform - // Maybe this is not completely correct. - uint32_t NumEntries = 1; - ur_platform_handle_t Platform{}; - ur_adapter_handle_t AdapterHandle = GlobalAdapter; - UR_CALL(urPlatformGet(&AdapterHandle, 1, NumEntries, &Platform, nullptr)); - - ur_device_handle_t UrDevice = Device; - if (UrDevice == nullptr) { - UR_CALL(urDeviceGet(Platform, UR_DEVICE_TYPE_GPU, NumEntries, &UrDevice, - nullptr)); - } - - // The NativeHandleDesc has value if if the native handle is an immediate - // command list. - if (NativeHandleDesc == 1) { - std::vector ComputeQueues{nullptr}; - std::vector CopyQueues; - - try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( - ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags); - *RetQueue = reinterpret_cast(Queue); - } catch (const std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - auto &InitialGroup = - Legacy(*RetQueue)->ComputeQueueGroupsByTID.begin()->second; - InitialGroup.setImmCmdList(Legacy(*RetQueue), - ur_cast(NativeQueue)); - } else { - auto ZeQueue = ur_cast(NativeQueue); - // Assume this is the "0" index queue in the compute command-group. - std::vector ZeQueues{ZeQueue}; - - // TODO: see what we can do to correctly initialize PI queue for - // compute vs. copy Level-Zero queue. Currently we will send - // all commands to the "ZeQueue". - std::vector ZeroCopyQueues; - - try { - ur_queue_handle_t_ *Queue = new ur_queue_handle_legacy_t_( - ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags); - *RetQueue = reinterpret_cast(Queue); - } catch (const std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - } - Legacy(*RetQueue)->UsingImmCmdLists = (NativeHandleDesc == 1); - - return UR_RESULT_SUCCESS; -} - ur_result_t ur_queue_handle_legacy_t_::queueFinish() { auto Queue = this; if (Queue->UsingImmCmdLists) { @@ -1918,7 +1922,7 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_legacy_t Queue, // event will not be waited/released by SYCL RT, so it must be destroyed by // EventRelease in resetCommandList. if (!IsInternal) - UR_CALL(urEventRetain(*Event)); + UR_CALL(ur::level_zero::urEventRetain(*Event)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index d06bbc5820..a86f1d9bb0 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/queue_api.cpp b/source/adapters/level_zero/queue_api.cpp index 622000a07f..fd8e13a15a 100644 --- a/source/adapters/level_zero/queue_api.cpp +++ b/source/adapters/level_zero/queue_api.cpp @@ -14,31 +14,30 @@ ur_queue_handle_t_::~ur_queue_handle_t_() {} -UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, - ur_queue_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +namespace ur::level_zero { +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { return hQueue->queueGetInfo(propName, propSize, pPropValue, pPropSizeRet); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRetain(ur_queue_handle_t hQueue) { +ur_result_t urQueueRetain(ur_queue_handle_t hQueue) { return hQueue->queueRetain(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) { +ur_result_t urQueueRelease(ur_queue_handle_t hQueue) { return hQueue->queueRelease(); } -UR_APIEXPORT ur_result_t UR_APICALL -urQueueGetNativeHandle(ur_queue_handle_t hQueue, ur_queue_native_desc_t *pDesc, - ur_native_handle_t *phNativeQueue) { +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue) { return hQueue->queueGetNativeHandle(pDesc, phNativeQueue); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { +ur_result_t urQueueFinish(ur_queue_handle_t hQueue) { return hQueue->queueFinish(); } -UR_APIEXPORT ur_result_t UR_APICALL urQueueFlush(ur_queue_handle_t hQueue) { +ur_result_t urQueueFlush(ur_queue_handle_t hQueue) { return hQueue->queueFlush(); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( +ur_result_t urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -47,27 +46,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( - ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( +ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, - size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferRead(hBuffer, blockingRead, offset, size, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( +ur_result_t urEnqueueMemBufferWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -75,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( +ur_result_t urEnqueueMemBufferReadRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -87,7 +89,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( +ur_result_t urEnqueueMemBufferWriteRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, @@ -99,16 +101,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, - ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferCopy(hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( +ur_result_t urEnqueueMemBufferCopyRect( ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, @@ -120,16 +124,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, const void *pPattern, - size_t patternSize, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemBufferFill(hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( +ur_result_t urEnqueueMemImageRead( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, @@ -138,7 +144,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( +ur_result_t urEnqueueMemImageWrite( ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, @@ -147,78 +153,85 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( - ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, - ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, - ur_rect_offset_t dstOrigin, ur_rect_region_t region, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemImageCopy(hImageSrc, hImageDst, srcOrigin, dstOrigin, region, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingMap, - ur_map_flags_t mapFlags, size_t offset, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, void **ppRetMap) { +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap) { return hQueue->enqueueMemBufferMap(hBuffer, blockingMap, mapFlags, offset, size, numEventsInWaitList, phEventWaitList, phEvent, ppRetMap); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( - ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueMemUnmap(hMem, pMappedPtr, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( - ur_queue_handle_t hQueue, void *pMem, size_t patternSize, - const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill(pMem, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, - size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy(blocking, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( - ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMPrefetch(pMem, size, flags, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL -urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, size_t size, - ur_usm_advice_flags_t advice, ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMAdvise(pMem, size, advice, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( - ur_queue_handle_t hQueue, void *pMem, size_t pitch, size_t patternSize, - const void *pPattern, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMFill2D(pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, - const void *pSrc, size_t srcPitch, size_t width, size_t height, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueUSMMemcpy2D(blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( +ur_result_t urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingWrite, size_t count, size_t offset, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -227,7 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( hProgram, name, blockingWrite, count, offset, pSrc, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( +ur_result_t urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, bool blockingRead, size_t count, size_t offset, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -236,25 +249,29 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pDst, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueReadHostPipe(hProgram, pipe_symbol, blocking, pDst, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( - ur_queue_handle_t hQueue, ur_program_handle_t hProgram, - const char *pipe_symbol, bool blocking, void *pSrc, size_t size, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { return hQueue->enqueueWriteHostPipe(hProgram, pipe_symbol, blocking, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( +ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, void *pDst, void *pSrc, const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, @@ -266,7 +283,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( dstOffset, copyExtent, hostExtent, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -274,7 +291,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { @@ -282,7 +299,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( +ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, @@ -291,13 +308,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( +ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return hQueue->enqueueTimestampRecordingExp(blocking, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( +ur_result_t urEnqueueKernelLaunchCustomExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numPropsInLaunchPropList, @@ -309,7 +326,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( numPropsInLaunchPropList, launchPropList, numEventsInWaitList, phEventWaitList, phEvent); } -UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( +ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, @@ -320,3 +337,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( pfnNativeEnqueue, data, numMemsInMemList, phMemList, pProperties, numEventsInWaitList, phEventWaitList, phEvent); } +} // namespace ur::level_zero \ No newline at end of file diff --git a/source/adapters/level_zero/sampler.cpp b/source/adapters/level_zero/sampler.cpp index 54ca1b6672..d48e6aeede 100644 --- a/source/adapters/level_zero/sampler.cpp +++ b/source/adapters/level_zero/sampler.cpp @@ -12,7 +12,9 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( +namespace ur::level_zero { + +ur_result_t urSamplerCreate( ur_context_handle_t Context, ///< [in] handle of the context object const ur_sampler_desc_t *Props, ///< [in] specifies a list of sampler property names and their @@ -109,17 +111,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreate( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRetain( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to get access +ur_result_t +urSamplerRetain(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to get access ) { Sampler->RefCount.increment(); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( - ur_sampler_handle_t - Sampler ///< [in] handle of the sampler object to release +ur_result_t +urSamplerRelease(ur_sampler_handle_t + Sampler ///< [in] handle of the sampler object to release ) { if (!Sampler->RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; @@ -133,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerRelease( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( +ur_result_t urSamplerGetInfo( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler object ur_sampler_info_t PropName, ///< [in] name of the sampler property to query size_t PropValueSize, ///< [in] size in bytes of the sampler property value @@ -152,7 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( +ur_result_t urSamplerGetNativeHandle( ur_sampler_handle_t Sampler, ///< [in] handle of the sampler. ur_native_handle_t *NativeSampler ///< [out] a pointer to the native ///< handle of the sampler. @@ -164,7 +166,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( +ur_result_t urSamplerCreateWithNativeHandle( ur_native_handle_t NativeSampler, ///< [in] the native handle of the sampler. ur_context_handle_t Context, ///< [in] handle of the context object @@ -182,3 +184,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( "{} function not implemented!", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index fb03d23c00..36549b500c 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -1,19 +1,19 @@ -//===--------- ur_interface_loader.cpp - Level Zero Adapter----------------===// +//===--------- ur_interface_loader.cpp - Level Zero Adapter ------------===// // -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2024 Intel Corporation // // Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM // Exceptions. See LICENSE.TXT // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - #include #include -namespace { +#include "ur_interface_loader.hpp" -ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { +static ur_result_t validateProcInputs(ur_api_version_t version, + void *pDdiTable) { if (nullptr == pDdiTable) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } @@ -24,472 +24,590 @@ ur_result_t validateProcInputs(ur_api_version_t version, void *pDdiTable) { } return UR_RESULT_SUCCESS; } -} // namespace -#if defined(__cplusplus) +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +#elif defined(__cplusplus) extern "C" { #endif -UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_global_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( + ur_api_version_t version, ur_global_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnAdapterGet = urAdapterGet; - pDdiTable->pfnAdapterRelease = urAdapterRelease; - pDdiTable->pfnAdapterRetain = urAdapterRetain; - pDdiTable->pfnAdapterGetLastError = urAdapterGetLastError; - pDdiTable->pfnAdapterGetInfo = urAdapterGetInfo; - return retVal; + pDdiTable->pfnAdapterGet = ur::level_zero::urAdapterGet; + pDdiTable->pfnAdapterRelease = ur::level_zero::urAdapterRelease; + pDdiTable->pfnAdapterRetain = ur::level_zero::urAdapterRetain; + pDdiTable->pfnAdapterGetLastError = ur::level_zero::urAdapterGetLastError; + pDdiTable->pfnAdapterGetInfo = ur::level_zero::urAdapterGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_context_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( + ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urContextCreate; - pDdiTable->pfnRetain = urContextRetain; - pDdiTable->pfnRelease = urContextRelease; - pDdiTable->pfnGetInfo = urContextGetInfo; - pDdiTable->pfnGetNativeHandle = urContextGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urContextCreateWithNativeHandle; - pDdiTable->pfnSetExtendedDeleter = urContextSetExtendedDeleter; + pDdiTable->pfnUnsampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesUnsampledImageHandleDestroyExp; + pDdiTable->pfnSampledImageHandleDestroyExp = + ur::level_zero::urBindlessImagesSampledImageHandleDestroyExp; + pDdiTable->pfnImageAllocateExp = + ur::level_zero::urBindlessImagesImageAllocateExp; + pDdiTable->pfnImageFreeExp = ur::level_zero::urBindlessImagesImageFreeExp; + pDdiTable->pfnUnsampledImageCreateExp = + ur::level_zero::urBindlessImagesUnsampledImageCreateExp; + pDdiTable->pfnSampledImageCreateExp = + ur::level_zero::urBindlessImagesSampledImageCreateExp; + pDdiTable->pfnImageCopyExp = ur::level_zero::urBindlessImagesImageCopyExp; + pDdiTable->pfnImageGetInfoExp = + ur::level_zero::urBindlessImagesImageGetInfoExp; + pDdiTable->pfnMipmapGetLevelExp = + ur::level_zero::urBindlessImagesMipmapGetLevelExp; + pDdiTable->pfnMipmapFreeExp = ur::level_zero::urBindlessImagesMipmapFreeExp; + pDdiTable->pfnImportExternalMemoryExp = + ur::level_zero::urBindlessImagesImportExternalMemoryExp; + pDdiTable->pfnMapExternalArrayExp = + ur::level_zero::urBindlessImagesMapExternalArrayExp; + pDdiTable->pfnReleaseInteropExp = + ur::level_zero::urBindlessImagesReleaseInteropExp; + pDdiTable->pfnImportExternalSemaphoreExp = + ur::level_zero::urBindlessImagesImportExternalSemaphoreExp; + pDdiTable->pfnDestroyExternalSemaphoreExp = + ur::level_zero::urBindlessImagesDestroyExternalSemaphoreExp; + pDdiTable->pfnWaitExternalSemaphoreExp = + ur::level_zero::urBindlessImagesWaitExternalSemaphoreExp; + pDdiTable->pfnSignalExternalSemaphoreExp = + ur::level_zero::urBindlessImagesSignalExternalSemaphoreExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_enqueue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( + ur_api_version_t version, ur_command_buffer_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnKernelLaunch = urEnqueueKernelLaunch; - pDdiTable->pfnEventsWait = urEnqueueEventsWait; - pDdiTable->pfnEventsWaitWithBarrier = urEnqueueEventsWaitWithBarrier; - pDdiTable->pfnMemBufferRead = urEnqueueMemBufferRead; - pDdiTable->pfnMemBufferWrite = urEnqueueMemBufferWrite; - pDdiTable->pfnMemBufferReadRect = urEnqueueMemBufferReadRect; - pDdiTable->pfnMemBufferWriteRect = urEnqueueMemBufferWriteRect; - pDdiTable->pfnMemBufferCopy = urEnqueueMemBufferCopy; - pDdiTable->pfnMemBufferCopyRect = urEnqueueMemBufferCopyRect; - pDdiTable->pfnMemBufferFill = urEnqueueMemBufferFill; - pDdiTable->pfnMemImageRead = urEnqueueMemImageRead; - pDdiTable->pfnMemImageWrite = urEnqueueMemImageWrite; - pDdiTable->pfnMemImageCopy = urEnqueueMemImageCopy; - pDdiTable->pfnMemBufferMap = urEnqueueMemBufferMap; - pDdiTable->pfnMemUnmap = urEnqueueMemUnmap; - pDdiTable->pfnUSMFill = urEnqueueUSMFill; - pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; - pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; - pDdiTable->pfnUSMAdvise = urEnqueueUSMAdvise; - pDdiTable->pfnUSMFill2D = urEnqueueUSMFill2D; - pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; - pDdiTable->pfnDeviceGlobalVariableWrite = urEnqueueDeviceGlobalVariableWrite; - pDdiTable->pfnDeviceGlobalVariableRead = urEnqueueDeviceGlobalVariableRead; - - return retVal; + pDdiTable->pfnCreateExp = ur::level_zero::urCommandBufferCreateExp; + pDdiTable->pfnRetainExp = ur::level_zero::urCommandBufferRetainExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urCommandBufferReleaseExp; + pDdiTable->pfnFinalizeExp = ur::level_zero::urCommandBufferFinalizeExp; + pDdiTable->pfnAppendKernelLaunchExp = + ur::level_zero::urCommandBufferAppendKernelLaunchExp; + pDdiTable->pfnAppendUSMMemcpyExp = + ur::level_zero::urCommandBufferAppendUSMMemcpyExp; + pDdiTable->pfnAppendUSMFillExp = + ur::level_zero::urCommandBufferAppendUSMFillExp; + pDdiTable->pfnAppendMemBufferCopyExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyExp; + pDdiTable->pfnAppendMemBufferWriteExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteExp; + pDdiTable->pfnAppendMemBufferReadExp = + ur::level_zero::urCommandBufferAppendMemBufferReadExp; + pDdiTable->pfnAppendMemBufferCopyRectExp = + ur::level_zero::urCommandBufferAppendMemBufferCopyRectExp; + pDdiTable->pfnAppendMemBufferWriteRectExp = + ur::level_zero::urCommandBufferAppendMemBufferWriteRectExp; + pDdiTable->pfnAppendMemBufferReadRectExp = + ur::level_zero::urCommandBufferAppendMemBufferReadRectExp; + pDdiTable->pfnAppendMemBufferFillExp = + ur::level_zero::urCommandBufferAppendMemBufferFillExp; + pDdiTable->pfnAppendUSMPrefetchExp = + ur::level_zero::urCommandBufferAppendUSMPrefetchExp; + pDdiTable->pfnAppendUSMAdviseExp = + ur::level_zero::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnEnqueueExp = ur::level_zero::urCommandBufferEnqueueExp; + pDdiTable->pfnRetainCommandExp = + ur::level_zero::urCommandBufferRetainCommandExp; + pDdiTable->pfnReleaseCommandExp = + ur::level_zero::urCommandBufferReleaseCommandExp; + pDdiTable->pfnUpdateKernelLaunchExp = + ur::level_zero::urCommandBufferUpdateKernelLaunchExp; + pDdiTable->pfnGetInfoExp = ur::level_zero::urCommandBufferGetInfoExp; + pDdiTable->pfnCommandGetInfoExp = + ur::level_zero::urCommandBufferCommandGetInfoExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_event_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( + ur_api_version_t version, ur_context_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urEventGetInfo; - pDdiTable->pfnGetProfilingInfo = urEventGetProfilingInfo; - pDdiTable->pfnWait = urEventWait; - pDdiTable->pfnRetain = urEventRetain; - pDdiTable->pfnRelease = urEventRelease; - pDdiTable->pfnGetNativeHandle = urEventGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urEventCreateWithNativeHandle; - pDdiTable->pfnSetCallback = urEventSetCallback; - - return retVal; + + pDdiTable->pfnCreate = ur::level_zero::urContextCreate; + pDdiTable->pfnRetain = ur::level_zero::urContextRetain; + pDdiTable->pfnRelease = ur::level_zero::urContextRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urContextGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urContextGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urContextCreateWithNativeHandle; + pDdiTable->pfnSetExtendedDeleter = + ur::level_zero::urContextSetExtendedDeleter; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_kernel_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( + ur_api_version_t version, ur_enqueue_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urKernelCreate; - pDdiTable->pfnGetInfo = urKernelGetInfo; - pDdiTable->pfnGetGroupInfo = urKernelGetGroupInfo; - pDdiTable->pfnGetSubGroupInfo = urKernelGetSubGroupInfo; - pDdiTable->pfnRetain = urKernelRetain; - pDdiTable->pfnRelease = urKernelRelease; - pDdiTable->pfnGetNativeHandle = urKernelGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urKernelCreateWithNativeHandle; - pDdiTable->pfnSetArgValue = urKernelSetArgValue; - pDdiTable->pfnSetArgLocal = urKernelSetArgLocal; - pDdiTable->pfnSetArgPointer = urKernelSetArgPointer; - pDdiTable->pfnSetExecInfo = urKernelSetExecInfo; - pDdiTable->pfnSetArgSampler = urKernelSetArgSampler; - pDdiTable->pfnSetArgMemObj = urKernelSetArgMemObj; - pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants; - pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize; - return retVal; + + pDdiTable->pfnKernelLaunch = ur::level_zero::urEnqueueKernelLaunch; + pDdiTable->pfnEventsWait = ur::level_zero::urEnqueueEventsWait; + pDdiTable->pfnEventsWaitWithBarrier = + ur::level_zero::urEnqueueEventsWaitWithBarrier; + pDdiTable->pfnMemBufferRead = ur::level_zero::urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferWrite = ur::level_zero::urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferReadRect = ur::level_zero::urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWriteRect = + ur::level_zero::urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemBufferCopy = ur::level_zero::urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = ur::level_zero::urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = ur::level_zero::urEnqueueMemBufferFill; + pDdiTable->pfnMemImageRead = ur::level_zero::urEnqueueMemImageRead; + pDdiTable->pfnMemImageWrite = ur::level_zero::urEnqueueMemImageWrite; + pDdiTable->pfnMemImageCopy = ur::level_zero::urEnqueueMemImageCopy; + pDdiTable->pfnMemBufferMap = ur::level_zero::urEnqueueMemBufferMap; + pDdiTable->pfnMemUnmap = ur::level_zero::urEnqueueMemUnmap; + pDdiTable->pfnUSMFill = ur::level_zero::urEnqueueUSMFill; + pDdiTable->pfnUSMMemcpy = ur::level_zero::urEnqueueUSMMemcpy; + pDdiTable->pfnUSMPrefetch = ur::level_zero::urEnqueueUSMPrefetch; + pDdiTable->pfnUSMAdvise = ur::level_zero::urEnqueueUSMAdvise; + pDdiTable->pfnUSMFill2D = ur::level_zero::urEnqueueUSMFill2D; + pDdiTable->pfnUSMMemcpy2D = ur::level_zero::urEnqueueUSMMemcpy2D; + pDdiTable->pfnDeviceGlobalVariableWrite = + ur::level_zero::urEnqueueDeviceGlobalVariableWrite; + pDdiTable->pfnDeviceGlobalVariableRead = + ur::level_zero::urEnqueueDeviceGlobalVariableRead; + pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe; + pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( + ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnImageCreate = urMemImageCreate; - pDdiTable->pfnBufferCreate = urMemBufferCreate; - pDdiTable->pfnRetain = urMemRetain; - pDdiTable->pfnRelease = urMemRelease; - pDdiTable->pfnBufferPartition = urMemBufferPartition; - pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; - pDdiTable->pfnBufferCreateWithNativeHandle = - urMemBufferCreateWithNativeHandle; - pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; - pDdiTable->pfnGetInfo = urMemGetInfo; - pDdiTable->pfnImageGetInfo = urMemImageGetInfo; - return retVal; + pDdiTable->pfnKernelLaunchCustomExp = + ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnCooperativeKernelLaunchExp = + ur::level_zero::urEnqueueCooperativeKernelLaunchExp; + pDdiTable->pfnTimestampRecordingExp = + ur::level_zero::urEnqueueTimestampRecordingExp; + pDdiTable->pfnNativeCommandExp = ur::level_zero::urEnqueueNativeCommandExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_platform_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( + ur_api_version_t version, ur_event_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urPlatformGet; - pDdiTable->pfnGetInfo = urPlatformGetInfo; - pDdiTable->pfnGetNativeHandle = urPlatformGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; - pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; - pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; - - return retVal; -} -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_program_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { + pDdiTable->pfnGetInfo = ur::level_zero::urEventGetInfo; + pDdiTable->pfnGetProfilingInfo = ur::level_zero::urEventGetProfilingInfo; + pDdiTable->pfnWait = ur::level_zero::urEventWait; + pDdiTable->pfnRetain = ur::level_zero::urEventRetain; + pDdiTable->pfnRelease = ur::level_zero::urEventRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urEventGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urEventCreateWithNativeHandle; + pDdiTable->pfnSetCallback = ur::level_zero::urEventSetCallback; + + return result; +} - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( + ur_api_version_t version, ur_kernel_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateWithIL = urProgramCreateWithIL; - pDdiTable->pfnCreateWithBinary = urProgramCreateWithBinary; - pDdiTable->pfnBuild = urProgramBuild; - pDdiTable->pfnCompile = urProgramCompile; - pDdiTable->pfnLink = urProgramLink; - pDdiTable->pfnRetain = urProgramRetain; - pDdiTable->pfnRelease = urProgramRelease; - pDdiTable->pfnGetFunctionPointer = urProgramGetFunctionPointer; - pDdiTable->pfnGetGlobalVariablePointer = urProgramGetGlobalVariablePointer; - pDdiTable->pfnGetInfo = urProgramGetInfo; - pDdiTable->pfnGetBuildInfo = urProgramGetBuildInfo; + + pDdiTable->pfnCreate = ur::level_zero::urKernelCreate; + pDdiTable->pfnGetInfo = ur::level_zero::urKernelGetInfo; + pDdiTable->pfnGetGroupInfo = ur::level_zero::urKernelGetGroupInfo; + pDdiTable->pfnGetSubGroupInfo = ur::level_zero::urKernelGetSubGroupInfo; + pDdiTable->pfnRetain = ur::level_zero::urKernelRetain; + pDdiTable->pfnRelease = ur::level_zero::urKernelRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urKernelGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urKernelCreateWithNativeHandle; + pDdiTable->pfnGetSuggestedLocalWorkSize = + ur::level_zero::urKernelGetSuggestedLocalWorkSize; + pDdiTable->pfnSetArgValue = ur::level_zero::urKernelSetArgValue; + pDdiTable->pfnSetArgLocal = ur::level_zero::urKernelSetArgLocal; + pDdiTable->pfnSetArgPointer = ur::level_zero::urKernelSetArgPointer; + pDdiTable->pfnSetExecInfo = ur::level_zero::urKernelSetExecInfo; + pDdiTable->pfnSetArgSampler = ur::level_zero::urKernelSetArgSampler; + pDdiTable->pfnSetArgMemObj = ur::level_zero::urKernelSetArgMemObj; pDdiTable->pfnSetSpecializationConstants = - urProgramSetSpecializationConstants; - pDdiTable->pfnGetNativeHandle = urProgramGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urProgramCreateWithNativeHandle; + ur::level_zero::urKernelSetSpecializationConstants; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_queue_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( + ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGetInfo = urQueueGetInfo; - pDdiTable->pfnCreate = urQueueCreate; - pDdiTable->pfnRetain = urQueueRetain; - pDdiTable->pfnRelease = urQueueRelease; - pDdiTable->pfnGetNativeHandle = urQueueGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urQueueCreateWithNativeHandle; - pDdiTable->pfnFinish = urQueueFinish; - pDdiTable->pfnFlush = urQueueFlush; + pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = + ur::level_zero::urKernelSuggestMaxCooperativeGroupCountExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_sampler_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetMemProcAddrTable(ur_api_version_t version, ur_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urSamplerCreate; - pDdiTable->pfnRetain = urSamplerRetain; - pDdiTable->pfnRelease = urSamplerRelease; - pDdiTable->pfnGetInfo = urSamplerGetInfo; - pDdiTable->pfnGetNativeHandle = urSamplerGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urSamplerCreateWithNativeHandle; - - return retVal; + + pDdiTable->pfnImageCreate = ur::level_zero::urMemImageCreate; + pDdiTable->pfnBufferCreate = ur::level_zero::urMemBufferCreate; + pDdiTable->pfnRetain = ur::level_zero::urMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urMemRelease; + pDdiTable->pfnBufferPartition = ur::level_zero::urMemBufferPartition; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urMemGetNativeHandle; + pDdiTable->pfnBufferCreateWithNativeHandle = + ur::level_zero::urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = + ur::level_zero::urMemImageCreateWithNativeHandle; + pDdiTable->pfnGetInfo = ur::level_zero::urMemGetInfo; + pDdiTable->pfnImageGetInfo = ur::level_zero::urMemImageGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_usm_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( + ur_api_version_t version, ur_physical_mem_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnHostAlloc = urUSMHostAlloc; - pDdiTable->pfnDeviceAlloc = urUSMDeviceAlloc; - pDdiTable->pfnSharedAlloc = urUSMSharedAlloc; - pDdiTable->pfnFree = urUSMFree; - pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; - pDdiTable->pfnPoolCreate = urUSMPoolCreate; - pDdiTable->pfnPoolRetain = urUSMPoolRetain; - pDdiTable->pfnPoolRelease = urUSMPoolRelease; - pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; - - return retVal; + pDdiTable->pfnCreate = ur::level_zero::urPhysicalMemCreate; + pDdiTable->pfnRetain = ur::level_zero::urPhysicalMemRetain; + pDdiTable->pfnRelease = ur::level_zero::urPhysicalMemRelease; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_device_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( + ur_api_version_t version, ur_platform_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnGet = urDeviceGet; - pDdiTable->pfnGetInfo = urDeviceGetInfo; - pDdiTable->pfnRetain = urDeviceRetain; - pDdiTable->pfnRelease = urDeviceRelease; - pDdiTable->pfnPartition = urDevicePartition; - pDdiTable->pfnSelectBinary = urDeviceSelectBinary; - pDdiTable->pfnGetNativeHandle = urDeviceGetNativeHandle; - pDdiTable->pfnCreateWithNativeHandle = urDeviceCreateWithNativeHandle; - pDdiTable->pfnGetGlobalTimestamps = urDeviceGetGlobalTimestamps; - - return retVal; + + pDdiTable->pfnGet = ur::level_zero::urPlatformGet; + pDdiTable->pfnGetInfo = ur::level_zero::urPlatformGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urPlatformGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urPlatformCreateWithNativeHandle; + pDdiTable->pfnGetApiVersion = ur::level_zero::urPlatformGetApiVersion; + pDdiTable->pfnGetBackendOption = ur::level_zero::urPlatformGetBackendOption; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_command_buffer_exp_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( + ur_api_version_t version, ur_program_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreateExp = urCommandBufferCreateExp; - pDdiTable->pfnRetainExp = urCommandBufferRetainExp; - pDdiTable->pfnReleaseExp = urCommandBufferReleaseExp; - pDdiTable->pfnFinalizeExp = urCommandBufferFinalizeExp; - pDdiTable->pfnAppendKernelLaunchExp = urCommandBufferAppendKernelLaunchExp; - pDdiTable->pfnAppendUSMMemcpyExp = urCommandBufferAppendUSMMemcpyExp; - pDdiTable->pfnAppendUSMFillExp = urCommandBufferAppendUSMFillExp; - pDdiTable->pfnAppendMemBufferCopyExp = urCommandBufferAppendMemBufferCopyExp; - pDdiTable->pfnAppendMemBufferCopyRectExp = - urCommandBufferAppendMemBufferCopyRectExp; - pDdiTable->pfnAppendMemBufferReadExp = urCommandBufferAppendMemBufferReadExp; - pDdiTable->pfnAppendMemBufferReadRectExp = - urCommandBufferAppendMemBufferReadRectExp; - pDdiTable->pfnAppendMemBufferWriteExp = - urCommandBufferAppendMemBufferWriteExp; - pDdiTable->pfnAppendMemBufferWriteRectExp = - urCommandBufferAppendMemBufferWriteRectExp; - pDdiTable->pfnAppendUSMPrefetchExp = urCommandBufferAppendUSMPrefetchExp; - pDdiTable->pfnAppendUSMAdviseExp = urCommandBufferAppendUSMAdviseExp; - pDdiTable->pfnAppendMemBufferFillExp = urCommandBufferAppendMemBufferFillExp; - pDdiTable->pfnEnqueueExp = urCommandBufferEnqueueExp; - pDdiTable->pfnUpdateKernelLaunchExp = urCommandBufferUpdateKernelLaunchExp; - pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; - pDdiTable->pfnCommandGetInfoExp = urCommandBufferCommandGetInfoExp; - pDdiTable->pfnReleaseCommandExp = urCommandBufferReleaseCommandExp; - pDdiTable->pfnRetainCommandExp = urCommandBufferRetainCommandExp; - - return retVal; + + pDdiTable->pfnCreateWithIL = ur::level_zero::urProgramCreateWithIL; + pDdiTable->pfnCreateWithBinary = ur::level_zero::urProgramCreateWithBinary; + pDdiTable->pfnBuild = ur::level_zero::urProgramBuild; + pDdiTable->pfnCompile = ur::level_zero::urProgramCompile; + pDdiTable->pfnLink = ur::level_zero::urProgramLink; + pDdiTable->pfnRetain = ur::level_zero::urProgramRetain; + pDdiTable->pfnRelease = ur::level_zero::urProgramRelease; + pDdiTable->pfnGetFunctionPointer = + ur::level_zero::urProgramGetFunctionPointer; + pDdiTable->pfnGetGlobalVariablePointer = + ur::level_zero::urProgramGetGlobalVariablePointer; + pDdiTable->pfnGetInfo = ur::level_zero::urProgramGetInfo; + pDdiTable->pfnGetBuildInfo = ur::level_zero::urProgramGetBuildInfo; + pDdiTable->pfnSetSpecializationConstants = + ur::level_zero::urProgramSetSpecializationConstants; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urProgramGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urProgramCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( - ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( + ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnEnablePeerAccessExp = urUsmP2PEnablePeerAccessExp; - pDdiTable->pfnDisablePeerAccessExp = urUsmP2PDisablePeerAccessExp; - pDdiTable->pfnPeerAccessGetInfoExp = urUsmP2PPeerAccessGetInfoExp; - return retVal; + pDdiTable->pfnBuildExp = ur::level_zero::urProgramBuildExp; + pDdiTable->pfnCompileExp = ur::level_zero::urProgramCompileExp; + pDdiTable->pfnLinkExp = ur::level_zero::urProgramLinkExp; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( - ur_api_version_t version, ur_bindless_images_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( + ur_api_version_t version, ur_queue_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnUnsampledImageHandleDestroyExp = - urBindlessImagesUnsampledImageHandleDestroyExp; - pDdiTable->pfnSampledImageHandleDestroyExp = - urBindlessImagesSampledImageHandleDestroyExp; - pDdiTable->pfnImageAllocateExp = urBindlessImagesImageAllocateExp; - pDdiTable->pfnImageFreeExp = urBindlessImagesImageFreeExp; - pDdiTable->pfnUnsampledImageCreateExp = - urBindlessImagesUnsampledImageCreateExp; - pDdiTable->pfnSampledImageCreateExp = urBindlessImagesSampledImageCreateExp; - pDdiTable->pfnImageCopyExp = urBindlessImagesImageCopyExp; - pDdiTable->pfnImageGetInfoExp = urBindlessImagesImageGetInfoExp; - pDdiTable->pfnMipmapGetLevelExp = urBindlessImagesMipmapGetLevelExp; - pDdiTable->pfnMipmapFreeExp = urBindlessImagesMipmapFreeExp; - pDdiTable->pfnImportExternalMemoryExp = - urBindlessImagesImportExternalMemoryExp; - pDdiTable->pfnMapExternalArrayExp = urBindlessImagesMapExternalArrayExp; - pDdiTable->pfnReleaseInteropExp = urBindlessImagesReleaseInteropExp; - pDdiTable->pfnImportExternalSemaphoreExp = - urBindlessImagesImportExternalSemaphoreExp; - pDdiTable->pfnDestroyExternalSemaphoreExp = - urBindlessImagesDestroyExternalSemaphoreExp; - pDdiTable->pfnWaitExternalSemaphoreExp = - urBindlessImagesWaitExternalSemaphoreExp; - pDdiTable->pfnSignalExternalSemaphoreExp = - urBindlessImagesSignalExternalSemaphoreExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnGetInfo = ur::level_zero::urQueueGetInfo; + pDdiTable->pfnCreate = ur::level_zero::urQueueCreate; + pDdiTable->pfnRetain = ur::level_zero::urQueueRetain; + pDdiTable->pfnRelease = ur::level_zero::urQueueRelease; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urQueueGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urQueueCreateWithNativeHandle; + pDdiTable->pfnFinish = ur::level_zero::urQueueFinish; + pDdiTable->pfnFlush = ur::level_zero::urQueueFlush; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( - ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( + ur_api_version_t version, ur_sampler_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; - pDdiTable->pfnImportExp = urUSMImportExp; - pDdiTable->pfnReleaseExp = urUSMReleaseExp; - return UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur::level_zero::urSamplerCreate; + pDdiTable->pfnRetain = ur::level_zero::urSamplerRetain; + pDdiTable->pfnRelease = ur::level_zero::urSamplerRelease; + pDdiTable->pfnGetInfo = ur::level_zero::urSamplerGetInfo; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urSamplerGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urSamplerCreateWithNativeHandle; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_virtual_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnFree = urVirtualMemFree; - pDdiTable->pfnGetInfo = urVirtualMemGetInfo; - pDdiTable->pfnGranularityGetInfo = urVirtualMemGranularityGetInfo; - pDdiTable->pfnMap = urVirtualMemMap; - pDdiTable->pfnReserve = urVirtualMemReserve; - pDdiTable->pfnSetAccess = urVirtualMemSetAccess; - pDdiTable->pfnUnmap = urVirtualMemUnmap; - - return retVal; + pDdiTable->pfnHostAlloc = ur::level_zero::urUSMHostAlloc; + pDdiTable->pfnDeviceAlloc = ur::level_zero::urUSMDeviceAlloc; + pDdiTable->pfnSharedAlloc = ur::level_zero::urUSMSharedAlloc; + pDdiTable->pfnFree = ur::level_zero::urUSMFree; + pDdiTable->pfnGetMemAllocInfo = ur::level_zero::urUSMGetMemAllocInfo; + pDdiTable->pfnPoolCreate = ur::level_zero::urUSMPoolCreate; + pDdiTable->pfnPoolRetain = ur::level_zero::urUSMPoolRetain; + pDdiTable->pfnPoolRelease = ur::level_zero::urUSMPoolRelease; + pDdiTable->pfnPoolGetInfo = ur::level_zero::urUSMPoolGetInfo; + + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_physical_mem_dditable_t - *pDdiTable ///< [in,out] pointer to table of DDI function pointers -) { - auto retVal = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != retVal) { - return retVal; +UR_APIEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( + ur_api_version_t version, ur_usm_exp_dditable_t *pDdiTable) { + auto result = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != result) { + return result; } - pDdiTable->pfnCreate = urPhysicalMemCreate; - pDdiTable->pfnRelease = urPhysicalMemRelease; - pDdiTable->pfnRetain = urPhysicalMemRetain; + pDdiTable->pfnPitchedAllocExp = ur::level_zero::urUSMPitchedAllocExp; + pDdiTable->pfnImportExp = ur::level_zero::urUSMImportExp; + pDdiTable->pfnReleaseExp = ur::level_zero::urUSMReleaseExp; - return retVal; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( - ur_api_version_t version, ur_enqueue_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( + ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnCooperativeKernelLaunchExp = - urEnqueueCooperativeKernelLaunchExp; - pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; - pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; + pDdiTable->pfnEnablePeerAccessExp = + ur::level_zero::urUsmP2PEnablePeerAccessExp; + pDdiTable->pfnDisablePeerAccessExp = + ur::level_zero::urUsmP2PDisablePeerAccessExp; + pDdiTable->pfnPeerAccessGetInfoExp = + ur::level_zero::urUsmP2PPeerAccessGetInfoExp; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( - ur_api_version_t version, ur_kernel_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( + ur_api_version_t version, ur_virtual_mem_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnSuggestMaxCooperativeGroupCountExp = - urKernelSuggestMaxCooperativeGroupCountExp; + pDdiTable->pfnGranularityGetInfo = + ur::level_zero::urVirtualMemGranularityGetInfo; + pDdiTable->pfnReserve = ur::level_zero::urVirtualMemReserve; + pDdiTable->pfnFree = ur::level_zero::urVirtualMemFree; + pDdiTable->pfnMap = ur::level_zero::urVirtualMemMap; + pDdiTable->pfnUnmap = ur::level_zero::urVirtualMemUnmap; + pDdiTable->pfnSetAccess = ur::level_zero::urVirtualMemSetAccess; + pDdiTable->pfnGetInfo = ur::level_zero::urVirtualMemGetInfo; - return UR_RESULT_SUCCESS; + return result; } -UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( - ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { +UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( + ur_api_version_t version, ur_device_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnBuildExp = urProgramBuildExp; - pDdiTable->pfnCompileExp = urProgramCompileExp; - pDdiTable->pfnLinkExp = urProgramLinkExp; - - return UR_RESULT_SUCCESS; + pDdiTable->pfnGet = ur::level_zero::urDeviceGet; + pDdiTable->pfnGetInfo = ur::level_zero::urDeviceGetInfo; + pDdiTable->pfnRetain = ur::level_zero::urDeviceRetain; + pDdiTable->pfnRelease = ur::level_zero::urDeviceRelease; + pDdiTable->pfnPartition = ur::level_zero::urDevicePartition; + pDdiTable->pfnSelectBinary = ur::level_zero::urDeviceSelectBinary; + pDdiTable->pfnGetNativeHandle = ur::level_zero::urDeviceGetNativeHandle; + pDdiTable->pfnCreateWithNativeHandle = + ur::level_zero::urDeviceCreateWithNativeHandle; + pDdiTable->pfnGetGlobalTimestamps = + ur::level_zero::urDeviceGetGlobalTimestamps; + + return result; } -#if defined(__cplusplus) + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +} // namespace ur::level_zero +#elif defined(__cplusplus) } // extern "C" #endif + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +namespace ur::level_zero { +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + if (ddi == nullptr) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result; + + result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Global); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetBindlessImagesExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetCommandBufferExpProcAddrTable( + UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Event); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->KernelExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Queue); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); + if (result != UR_RESULT_SUCCESS) + return result; + result = + ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); + if (result != UR_RESULT_SUCCESS) + return result; + result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); + if (result != UR_RESULT_SUCCESS) + return result; + + return result; +} +} // namespace ur::level_zero +#endif diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp new file mode 100644 index 0000000000..9133cc97ca --- /dev/null +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -0,0 +1,703 @@ +//===--------- ur_interface_loader.hpp - Level Zero Adapter ------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace ur::level_zero { +ur_result_t urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, + uint32_t *pNumAdapters); +ur_result_t urAdapterRelease(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterRetain(ur_adapter_handle_t hAdapter); +ur_result_t urAdapterGetLastError(ur_adapter_handle_t hAdapter, + const char **ppMessage, int32_t *pError); +ur_result_t urAdapterGetInfo(ur_adapter_handle_t hAdapter, + ur_adapter_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGet(ur_adapter_handle_t *phAdapters, uint32_t NumAdapters, + uint32_t NumEntries, + ur_platform_handle_t *phPlatforms, + uint32_t *pNumPlatforms); +ur_result_t urPlatformGetInfo(ur_platform_handle_t hPlatform, + ur_platform_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPlatformGetApiVersion(ur_platform_handle_t hPlatform, + ur_api_version_t *pVersion); +ur_result_t urPlatformGetNativeHandle(ur_platform_handle_t hPlatform, + ur_native_handle_t *phNativePlatform); +ur_result_t urPlatformCreateWithNativeHandle( + ur_native_handle_t hNativePlatform, ur_adapter_handle_t hAdapter, + const ur_platform_native_properties_t *pProperties, + ur_platform_handle_t *phPlatform); +ur_result_t urPlatformGetBackendOption(ur_platform_handle_t hPlatform, + const char *pFrontendOption, + const char **ppPlatformOption); +ur_result_t urDeviceGet(ur_platform_handle_t hPlatform, + ur_device_type_t DeviceType, uint32_t NumEntries, + ur_device_handle_t *phDevices, uint32_t *pNumDevices); +ur_result_t urDeviceGetInfo(ur_device_handle_t hDevice, + ur_device_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urDeviceRetain(ur_device_handle_t hDevice); +ur_result_t urDeviceRelease(ur_device_handle_t hDevice); +ur_result_t +urDevicePartition(ur_device_handle_t hDevice, + const ur_device_partition_properties_t *pProperties, + uint32_t NumDevices, ur_device_handle_t *phSubDevices, + uint32_t *pNumDevicesRet); +ur_result_t urDeviceSelectBinary(ur_device_handle_t hDevice, + const ur_device_binary_t *pBinaries, + uint32_t NumBinaries, + uint32_t *pSelectedBinary); +ur_result_t urDeviceGetNativeHandle(ur_device_handle_t hDevice, + ur_native_handle_t *phNativeDevice); +ur_result_t +urDeviceCreateWithNativeHandle(ur_native_handle_t hNativeDevice, + ur_platform_handle_t hPlatform, + const ur_device_native_properties_t *pProperties, + ur_device_handle_t *phDevice); +ur_result_t urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, + uint64_t *pDeviceTimestamp, + uint64_t *pHostTimestamp); +ur_result_t urContextCreate(uint32_t DeviceCount, + const ur_device_handle_t *phDevices, + const ur_context_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t urContextRetain(ur_context_handle_t hContext); +ur_result_t urContextRelease(ur_context_handle_t hContext); +ur_result_t urContextGetInfo(ur_context_handle_t hContext, + ur_context_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urContextGetNativeHandle(ur_context_handle_t hContext, + ur_native_handle_t *phNativeContext); +ur_result_t urContextCreateWithNativeHandle( + ur_native_handle_t hNativeContext, uint32_t numDevices, + const ur_device_handle_t *phDevices, + const ur_context_native_properties_t *pProperties, + ur_context_handle_t *phContext); +ur_result_t +urContextSetExtendedDeleter(ur_context_handle_t hContext, + ur_context_extended_deleter_t pfnDeleter, + void *pUserData); +ur_result_t urMemImageCreate(ur_context_handle_t hContext, ur_mem_flags_t flags, + const ur_image_format_t *pImageFormat, + const ur_image_desc_t *pImageDesc, void *pHost, + ur_mem_handle_t *phMem); +ur_result_t urMemBufferCreate(ur_context_handle_t hContext, + ur_mem_flags_t flags, size_t size, + const ur_buffer_properties_t *pProperties, + ur_mem_handle_t *phBuffer); +ur_result_t urMemRetain(ur_mem_handle_t hMem); +ur_result_t urMemRelease(ur_mem_handle_t hMem); +ur_result_t urMemBufferPartition(ur_mem_handle_t hBuffer, ur_mem_flags_t flags, + ur_buffer_create_type_t bufferCreateType, + const ur_buffer_region_t *pRegion, + ur_mem_handle_t *phMem); +ur_result_t urMemGetNativeHandle(ur_mem_handle_t hMem, + ur_device_handle_t hDevice, + ur_native_handle_t *phNativeMem); +ur_result_t urMemBufferCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemImageCreateWithNativeHandle( + ur_native_handle_t hNativeMem, ur_context_handle_t hContext, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem); +ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urMemImageGetInfo(ur_mem_handle_t hMemory, ur_image_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urSamplerCreate(ur_context_handle_t hContext, + const ur_sampler_desc_t *pDesc, + ur_sampler_handle_t *phSampler); +ur_result_t urSamplerRetain(ur_sampler_handle_t hSampler); +ur_result_t urSamplerRelease(ur_sampler_handle_t hSampler); +ur_result_t urSamplerGetInfo(ur_sampler_handle_t hSampler, + ur_sampler_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urSamplerGetNativeHandle(ur_sampler_handle_t hSampler, + ur_native_handle_t *phNativeSampler); +ur_result_t urSamplerCreateWithNativeHandle( + ur_native_handle_t hNativeSampler, ur_context_handle_t hContext, + const ur_sampler_native_properties_t *pProperties, + ur_sampler_handle_t *phSampler); +ur_result_t urUSMHostAlloc(ur_context_handle_t hContext, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMDeviceAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMSharedAlloc(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t size, + void **ppMem); +ur_result_t urUSMFree(ur_context_handle_t hContext, void *pMem); +ur_result_t urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, + ur_usm_alloc_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urUSMPoolCreate(ur_context_handle_t hContext, + ur_usm_pool_desc_t *pPoolDesc, + ur_usm_pool_handle_t *ppPool); +ur_result_t urUSMPoolRetain(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolRelease(ur_usm_pool_handle_t pPool); +ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool, + ur_usm_pool_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemGranularityGetInfo( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_virtual_mem_granularity_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart); +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size); +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags); +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urPhysicalMemCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, size_t size, + const ur_physical_mem_properties_t *pProperties, + ur_physical_mem_handle_t *phPhysicalMem); +ur_result_t urPhysicalMemRetain(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urPhysicalMemRelease(ur_physical_mem_handle_t hPhysicalMem); +ur_result_t urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL, + size_t length, + const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramCreateWithBinary( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + const uint8_t *pBinary, const ur_program_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urProgramBuild(ur_context_handle_t hContext, + ur_program_handle_t hProgram, const char *pOptions); +ur_result_t urProgramCompile(ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const char *pOptions); +ur_result_t urProgramLink(ur_context_handle_t hContext, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, ur_program_handle_t *phProgram); +ur_result_t urProgramRetain(ur_program_handle_t hProgram); +ur_result_t urProgramRelease(ur_program_handle_t hProgram); +ur_result_t urProgramGetFunctionPointer(ur_device_handle_t hDevice, + ur_program_handle_t hProgram, + const char *pFunctionName, + void **ppFunctionPointer); +ur_result_t urProgramGetGlobalVariablePointer( + ur_device_handle_t hDevice, ur_program_handle_t hProgram, + const char *pGlobalVariableName, size_t *pGlobalVariableSizeRet, + void **ppGlobalVariablePointerRet); +ur_result_t urProgramGetInfo(ur_program_handle_t hProgram, + ur_program_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urProgramGetBuildInfo(ur_program_handle_t hProgram, + ur_device_handle_t hDevice, + ur_program_build_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urProgramSetSpecializationConstants( + ur_program_handle_t hProgram, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urProgramGetNativeHandle(ur_program_handle_t hProgram, + ur_native_handle_t *phNativeProgram); +ur_result_t urProgramCreateWithNativeHandle( + ur_native_handle_t hNativeProgram, ur_context_handle_t hContext, + const ur_program_native_properties_t *pProperties, + ur_program_handle_t *phProgram); +ur_result_t urKernelCreate(ur_program_handle_t hProgram, + const char *pKernelName, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelSetArgValue( + ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, + const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue); +ur_result_t +urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, + size_t argSize, + const ur_kernel_arg_local_properties_t *pProperties); +ur_result_t urKernelGetInfo(ur_kernel_handle_t hKernel, + ur_kernel_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urKernelGetGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, + ur_device_handle_t hDevice, + ur_kernel_sub_group_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urKernelRetain(ur_kernel_handle_t hKernel); +ur_result_t urKernelRelease(ur_kernel_handle_t hKernel); +ur_result_t +urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_pointer_properties_t *pProperties, + const void *pArgValue); +ur_result_t +urKernelSetExecInfo(ur_kernel_handle_t hKernel, ur_kernel_exec_info_t propName, + size_t propSize, + const ur_kernel_exec_info_properties_t *pProperties, + const void *pPropValue); +ur_result_t +urKernelSetArgSampler(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_sampler_properties_t *pProperties, + ur_sampler_handle_t hArgValue); +ur_result_t +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *pProperties, + ur_mem_handle_t hArgValue); +ur_result_t urKernelSetSpecializationConstants( + ur_kernel_handle_t hKernel, uint32_t count, + const ur_specialization_constant_info_t *pSpecConstants); +ur_result_t urKernelGetNativeHandle(ur_kernel_handle_t hKernel, + ur_native_handle_t *phNativeKernel); +ur_result_t +urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, + ur_context_handle_t hContext, + ur_program_handle_t hProgram, + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel); +ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel, + ur_queue_handle_t hQueue, + uint32_t numWorkDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + size_t *pSuggestedLocalWorkSize); +ur_result_t urQueueGetInfo(ur_queue_handle_t hQueue, ur_queue_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueRetain(ur_queue_handle_t hQueue); +ur_result_t urQueueRelease(ur_queue_handle_t hQueue); +ur_result_t urQueueGetNativeHandle(ur_queue_handle_t hQueue, + ur_queue_native_desc_t *pDesc, + ur_native_handle_t *phNativeQueue); +ur_result_t urQueueCreateWithNativeHandle( + ur_native_handle_t hNativeQueue, ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_queue_native_properties_t *pProperties, + ur_queue_handle_t *phQueue); +ur_result_t urQueueFinish(ur_queue_handle_t hQueue); +ur_result_t urQueueFlush(ur_queue_handle_t hQueue); +ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent, + ur_profiling_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEventWait(uint32_t numEvents, + const ur_event_handle_t *phEventWaitList); +ur_result_t urEventRetain(ur_event_handle_t hEvent); +ur_result_t urEventRelease(ur_event_handle_t hEvent); +ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent, + ur_native_handle_t *phNativeEvent); +ur_result_t +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent); +ur_result_t urEventSetCallback(ur_event_handle_t hEvent, + ur_execution_info_t execStatus, + ur_event_callback_t pfnNotify, void *pUserData); +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWait(ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferRead(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingRead, + size_t offset, size_t size, void *pDst, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, + ur_rect_offset_t bufferOrigin, ur_rect_offset_t hostOrigin, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopy(ur_queue_handle_t hQueue, + ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, size_t srcOffset, + size_t dstOffset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ur_mem_handle_t hBufferSrc, + ur_mem_handle_t hBufferDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferFill(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, + size_t offset, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageRead( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingRead, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemImageWrite( + ur_queue_handle_t hQueue, ur_mem_handle_t hImage, bool blockingWrite, + ur_rect_offset_t origin, ur_rect_region_t region, size_t rowPitch, + size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urEnqueueMemImageCopy(ur_queue_handle_t hQueue, ur_mem_handle_t hImageSrc, + ur_mem_handle_t hImageDst, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueMemBufferMap(ur_queue_handle_t hQueue, + ur_mem_handle_t hBuffer, bool blockingMap, + ur_map_flags_t mapFlags, size_t offset, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, void **ppRetMap); +ur_result_t urEnqueueMemUnmap(ur_queue_handle_t hQueue, ur_mem_handle_t hMem, + void *pMappedPtr, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill(ur_queue_handle_t hQueue, void *pMem, + size_t patternSize, const void *pPattern, + size_t size, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy(ur_queue_handle_t hQueue, bool blocking, + void *pDst, const void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMPrefetch(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMAdvise(ur_queue_handle_t hQueue, const void *pMem, + size_t size, ur_usm_advice_flags_t advice, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFill2D(ur_queue_handle_t hQueue, void *pMem, + size_t pitch, size_t patternSize, + const void *pPattern, size_t width, + size_t height, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMMemcpy2D(ur_queue_handle_t hQueue, bool blocking, + void *pDst, size_t dstPitch, const void *pSrc, + size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableWrite( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingWrite, size_t count, size_t offset, const void *pSrc, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueDeviceGlobalVariableRead( + ur_queue_handle_t hQueue, ur_program_handle_t hProgram, const char *name, + bool blockingRead, size_t count, size_t offset, void *pDst, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueReadHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pDst, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, + ur_program_handle_t hProgram, + const char *pipe_symbol, bool blocking, + void *pSrc, size_t size, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_usm_desc_t *pUSMDesc, + ur_usm_pool_handle_t pool, size_t widthInBytes, + size_t height, size_t elementSizeBytes, + void **ppMem, size_t *pResultPitch); +ur_result_t urBindlessImagesUnsampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesSampledImageHandleDestroyExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_native_handle_t hImage); +ur_result_t urBindlessImagesImageAllocateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesImageFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem); +ur_result_t urBindlessImagesUnsampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesSampledImageCreateExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_sampler_handle_t hSampler, ur_exp_image_native_handle_t *phImage); +ur_result_t urBindlessImagesImageCopyExp( + ur_queue_handle_t hQueue, void *pDst, void *pSrc, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_image_copy_flags_t imageCopyFlags, ur_rect_offset_t srcOffset, + ur_rect_offset_t dstOffset, ur_rect_region_t copyExtent, + ur_rect_region_t hostExtent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesImageGetInfoExp( + ur_context_handle_t hContext, ur_exp_image_mem_native_handle_t hImageMem, + ur_image_info_t propName, void *pPropValue, size_t *pPropSizeRet); +ur_result_t urBindlessImagesMipmapGetLevelExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hImageMem, uint32_t mipmapLevel, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesMipmapFreeExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_image_mem_native_handle_t hMem); +ur_result_t urBindlessImagesImportExternalMemoryExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, + ur_exp_external_mem_type_t memHandleType, + ur_exp_interop_mem_desc_t *pInteropMemDesc, + ur_exp_interop_mem_handle_t *phInteropMem); +ur_result_t urBindlessImagesMapExternalArrayExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + const ur_image_format_t *pImageFormat, const ur_image_desc_t *pImageDesc, + ur_exp_interop_mem_handle_t hInteropMem, + ur_exp_image_mem_native_handle_t *phImageMem); +ur_result_t +urBindlessImagesReleaseInteropExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_exp_interop_mem_handle_t hInteropMem); +ur_result_t urBindlessImagesImportExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_external_semaphore_type_t semHandleType, + ur_exp_interop_semaphore_desc_t *pInteropSemaphoreDesc, + ur_exp_interop_semaphore_handle_t *phInteropSemaphore); +ur_result_t urBindlessImagesDestroyExternalSemaphoreExp( + ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_exp_interop_semaphore_handle_t hInteropSemaphore); +ur_result_t urBindlessImagesWaitExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + bool hasWaitValue, uint64_t waitValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urBindlessImagesSignalExternalSemaphoreExp( + ur_queue_handle_t hQueue, ur_exp_interop_semaphore_handle_t hSemaphore, + bool hasSignalValue, uint64_t signalValue, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_exp_command_buffer_desc_t *pCommandBufferDesc, + ur_exp_command_buffer_handle_t *phCommandBuffer); +ur_result_t +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer); +ur_result_t urCommandBufferAppendKernelLaunchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint, + ur_exp_command_buffer_command_handle_t *phCommand); +ur_result_t urCommandBufferAppendUSMMemcpyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, + size_t size, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, void *pMemory, + const void *pPattern, size_t patternSize, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, size_t srcOffset, size_t dstOffset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, const void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + size_t offset, size_t size, void *pDst, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferCopyRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hSrcMem, + ur_mem_handle_t hDstMem, ur_rect_offset_t srcOrigin, + ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, + size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferWriteRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pSrc, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferReadRectExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + ur_rect_offset_t bufferOffset, ur_rect_offset_t hostOffset, + ur_rect_region_t region, size_t bufferRowPitch, size_t bufferSlicePitch, + size_t hostRowPitch, size_t hostSlicePitch, void *pDst, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendMemBufferFillExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_mem_handle_t hBuffer, + const void *pPattern, size_t patternSize, size_t offset, size_t size, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMPrefetchExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_migration_flags_t flags, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferAppendUSMAdviseExp( + ur_exp_command_buffer_handle_t hCommandBuffer, const void *pMemory, + size_t size, ur_usm_advice_flags_t advice, uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); +ur_result_t urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urCommandBufferRetainCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferReleaseCommandExp( + ur_exp_command_buffer_command_handle_t hCommand); +ur_result_t urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch); +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urCommandBufferCommandGetInfoExp( + ur_exp_command_buffer_command_handle_t hCommand, + ur_exp_command_buffer_command_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet); +ur_result_t urEnqueueCooperativeKernelLaunchExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urKernelSuggestMaxCooperativeGroupCountExp( + ur_kernel_handle_t hKernel, size_t localWorkSize, + size_t dynamicSharedMemorySize, uint32_t *pGroupCountRet); +ur_result_t urEnqueueTimestampRecordingExp( + ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueKernelLaunchCustomExp( + ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numPropsInLaunchPropList, + const ur_exp_launch_property_t *launchPropList, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +ur_result_t urProgramBuildExp(ur_program_handle_t hProgram, uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramCompileExp(ur_program_handle_t hProgram, + uint32_t numDevices, + ur_device_handle_t *phDevices, + const char *pOptions); +ur_result_t urProgramLinkExp(ur_context_handle_t hContext, uint32_t numDevices, + ur_device_handle_t *phDevices, uint32_t count, + const ur_program_handle_t *phPrograms, + const char *pOptions, + ur_program_handle_t *phProgram); +ur_result_t urUSMImportExp(ur_context_handle_t hContext, void *pMem, + size_t size); +ur_result_t urUSMReleaseExp(ur_context_handle_t hContext, void *pMem); +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice); +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet); +ur_result_t urEnqueueNativeCommandExp( + ur_queue_handle_t hQueue, + ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, + uint32_t numMemsInMemList, const ur_mem_handle_t *phMemList, + const ur_exp_enqueue_native_command_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); +#endif +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_level_zero.hpp b/source/adapters/level_zero/ur_level_zero.hpp index 096ae076f9..36965c5d58 100644 --- a/source/adapters/level_zero/ur_level_zero.hpp +++ b/source/adapters/level_zero/ur_level_zero.hpp @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 2f05bfaa57..f68ecd386e 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -17,6 +17,7 @@ #include "usm.hpp" #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include @@ -295,7 +296,9 @@ static ur_result_t USMHostAllocImpl(void **ResultPtr, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( +namespace ur::level_zero { + +ur_result_t urUSMHostAlloc( ur_context_handle_t Context, ///< [in] handle of the context object const ur_usm_desc_t *USMDesc, ///< [in][optional] USM memory allocation descriptor @@ -330,7 +333,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -363,7 +366,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( +ur_result_t urUSMDeviceAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -401,7 +404,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } else { ContextLock.lock(); } @@ -439,7 +442,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMDeviceAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( +ur_result_t urUSMSharedAlloc( ur_context_handle_t Context, ///< [in] handle of the context object ur_device_handle_t Device, ///< [in] handle of the device object const ur_usm_desc_t @@ -500,7 +503,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( // We are going to defer memory release if there are kernels with indirect // access, that is why explicitly retain context to be sure that it is // released after all memory allocations in this context are released. - UR_CALL(urContextRetain(Context)); + UR_CALL(ur::level_zero::urContextRetain(Context)); } umf_memory_pool_handle_t hPoolInternal = nullptr; @@ -542,9 +545,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMSharedAlloc( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( - ur_context_handle_t Context, ///< [in] handle of the context object - void *Mem ///< [in] pointer to USM memory object +ur_result_t +urUSMFree(ur_context_handle_t Context, ///< [in] handle of the context object + void *Mem ///< [in] pointer to USM memory object ) { ur_platform_handle_t Plt = Context->getPlatform(); @@ -554,7 +557,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree( return USMFreeHelper(Context, Mem); } -UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( +ur_result_t urUSMGetMemAllocInfo( ur_context_handle_t Context, ///< [in] handle of the context object const void *Ptr, ///< [in] pointer to USM memory object ur_usm_alloc_info_t @@ -654,6 +657,103 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( return UR_RESULT_SUCCESS; } +ur_result_t urUSMPoolCreate( + ur_context_handle_t Context, ///< [in] handle of the context object + ur_usm_pool_desc_t + *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool +) { + + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, PoolDesc)); + + std::shared_lock ContextLock(Context->Mutex); + Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); + + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + Pool->RefCount.increment(); + return UR_RESULT_SUCCESS; +} + +ur_result_t +urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool +) { + if (Pool->RefCount.decrementAndTest()) { + std::shared_lock ContextLock(Pool->Context->Mutex); + Pool->Context->UsmPoolHandles.remove(Pool); + delete Pool; + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMPoolGetInfo( + ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool + ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query + size_t PropSize, ///< [in] size in bytes of the pool property value provided + void *PropValue, ///< [out][typename(propName, propSize)] value of the pool + ///< property + size_t *PropSizeRet ///< [out] size in bytes returned in pool property value +) { + UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); + + switch (PropName) { + case UR_USM_POOL_INFO_REFERENCE_COUNT: { + return ReturnValue(Pool->RefCount.load()); + } + case UR_USM_POOL_INFO_CONTEXT: { + return ReturnValue(Pool->Context); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } +} + +ur_result_t urUSMImportExp(ur_context_handle_t Context, void *HostPtr, + size_t Size) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Promote the host ptr to USM host memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) { + // Query memory type of the host pointer + ze_device_handle_t ZeDeviceHandle; + ZeStruct ZeMemoryAllocationProperties; + ZE2UR_CALL(zeMemGetAllocProperties, + (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, + &ZeDeviceHandle)); + + // If not shared of any type, we can import the ptr + if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { + // Promote the host ptr to USM host memory + ze_driver_handle_t driverHandle = + Context->getPlatform()->ZeDriverHandleExpTranslated; + ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); + } + } + return UR_RESULT_SUCCESS; +} + +ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { + UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); + + // Release the imported memory. + if (ZeUSMImport.Supported && HostPtr != nullptr) + ZeUSMImport.doZeUSMRelease( + Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); + return UR_RESULT_SUCCESS; +} +} // namespace ur::level_zero + static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { auto ZeResult = ZE_CALL_NOCHECK(zeMemFree, (Context->ZeContext, Ptr)); // Handle When the driver is already released @@ -868,68 +968,6 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } } -UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( - ur_context_handle_t Context, ///< [in] handle of the context object - ur_usm_pool_desc_t - *PoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with - ///< ::ur_usm_pool_limits_desc_t - ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool -) { - - try { - *Pool = reinterpret_cast( - new ur_usm_pool_handle_t_(Context, PoolDesc)); - - std::shared_lock ContextLock(Context->Mutex); - Context->UsmPoolHandles.insert(Context->UsmPoolHandles.cend(), *Pool); - - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRetain(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - Pool->RefCount.increment(); - return UR_RESULT_SUCCESS; -} - -ur_result_t -urUSMPoolRelease(ur_usm_pool_handle_t Pool ///< [in] pointer to USM memory pool -) { - if (Pool->RefCount.decrementAndTest()) { - std::shared_lock ContextLock(Pool->Context->Mutex); - Pool->Context->UsmPoolHandles.remove(Pool); - delete Pool; - } - return UR_RESULT_SUCCESS; -} - -ur_result_t urUSMPoolGetInfo( - ur_usm_pool_handle_t Pool, ///< [in] handle of the USM memory pool - ur_usm_pool_info_t PropName, ///< [in] name of the pool property to query - size_t PropSize, ///< [in] size in bytes of the pool property value provided - void *PropValue, ///< [out][typename(propName, propSize)] value of the pool - ///< property - size_t *PropSizeRet ///< [out] size in bytes returned in pool property value -) { - UrReturnHelper ReturnValue(PropSize, PropValue, PropSizeRet); - - switch (PropName) { - case UR_USM_POOL_INFO_REFERENCE_COUNT: { - return ReturnValue(Pool->RefCount.load()); - } - case UR_USM_POOL_INFO_CONTEXT: { - return ReturnValue(Pool->Context); - } - default: { - return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; - } - } -} - // If indirect access tracking is not enabled then this functions just performs // zeMemFree. If indirect access tracking is enabled then reference counting is // performed. @@ -1012,38 +1050,3 @@ ur_result_t USMFreeHelper(ur_context_handle_t Context, void *Ptr, UR_CALL(ContextReleaseHelper(Context)); return umf2urResult(umfRet); } - -UR_APIEXPORT ur_result_t UR_APICALL urUSMImportExp(ur_context_handle_t Context, - void *HostPtr, size_t Size) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Promote the host ptr to USM host memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) { - // Query memory type of the host pointer - ze_device_handle_t ZeDeviceHandle; - ZeStruct ZeMemoryAllocationProperties; - ZE2UR_CALL(zeMemGetAllocProperties, - (Context->ZeContext, HostPtr, &ZeMemoryAllocationProperties, - &ZeDeviceHandle)); - - // If not shared of any type, we can import the ptr - if (ZeMemoryAllocationProperties.type == ZE_MEMORY_TYPE_UNKNOWN) { - // Promote the host ptr to USM host memory - ze_driver_handle_t driverHandle = - Context->getPlatform()->ZeDriverHandleExpTranslated; - ZeUSMImport.doZeUSMImport(driverHandle, HostPtr, Size); - } - } - return UR_RESULT_SUCCESS; -} - -UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, - void *HostPtr) { - UR_ASSERT(Context, UR_RESULT_ERROR_INVALID_CONTEXT); - - // Release the imported memory. - if (ZeUSMImport.Supported && HostPtr != nullptr) - ZeUSMImport.doZeUSMRelease( - Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); - return UR_RESULT_SUCCESS; -} diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp index 2b81828423..6e701aa803 100644 --- a/source/adapters/level_zero/usm_p2p.cpp +++ b/source/adapters/level_zero/usm_p2p.cpp @@ -11,8 +11,10 @@ #include "logger/ur_logger.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +namespace ur::level_zero { + +ur_result_t urUsmP2PEnablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -21,8 +23,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice) { +ur_result_t urUsmP2PDisablePeerAccessExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice) { std::ignore = commandDevice; std::ignore = peerDevice; @@ -31,10 +33,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( - ur_device_handle_t commandDevice, ur_device_handle_t peerDevice, - ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { +ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, + ur_device_handle_t peerDevice, + ur_exp_peer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); @@ -69,3 +72,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return ReturnValue(propertyValue); } +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp index adda56418b..1b60dda2f3 100644 --- a/source/adapters/level_zero/v2/command_list_cache.hpp +++ b/source/adapters/level_zero/v2/command_list_cache.hpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include #include "common.hpp" diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index e3b90121a1..e89899ded7 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -15,7 +15,9 @@ #include "physical_mem.hpp" #include "ur_level_zero.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( +namespace ur::level_zero { + +ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { @@ -39,24 +41,24 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, - size_t size, void **ppStart) { +ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, + const void *pStart, size_t size, + void **ppStart) { ZE2UR_CALL(zeVirtualMemReserve, (hContext->ZeContext, pStart, size, ppStart)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemFree( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemFree(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemFree, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, - size_t size, ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemSetAccess(ur_context_handle_t hContext, + const void *pStart, size_t size, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -69,10 +71,10 @@ urVirtualMemSetAccess(ur_context_handle_t hContext, const void *pStart, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL -urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, - ur_physical_mem_handle_t hPhysicalMem, size_t offset, - ur_virtual_mem_access_flags_t flags) { +ur_result_t urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, + size_t size, ur_physical_mem_handle_t hPhysicalMem, + size_t offset, + ur_virtual_mem_access_flags_t flags) { ze_memory_access_attribute_t AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_NONE; if (flags & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) AccessAttr = ZE_MEMORY_ACCESS_ATTRIBUTE_READWRITE; @@ -86,17 +88,18 @@ urVirtualMemMap(ur_context_handle_t hContext, const void *pStart, size_t size, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemUnmap( - ur_context_handle_t hContext, const void *pStart, size_t size) { +ur_result_t urVirtualMemUnmap(ur_context_handle_t hContext, const void *pStart, + size_t size) { ZE2UR_CALL(zeVirtualMemUnmap, (hContext->ZeContext, pStart, size)); return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( - ur_context_handle_t hContext, const void *pStart, - [[maybe_unused]] size_t size, ur_virtual_mem_info_t propName, - size_t propSize, void *pPropValue, size_t *pPropSizeRet) { +ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, + const void *pStart, + [[maybe_unused]] size_t size, + ur_virtual_mem_info_t propName, size_t propSize, + void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { @@ -119,3 +122,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGetInfo( return UR_RESULT_SUCCESS; } +} // namespace ur::level_zero diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index 0b4a62cc5f..07b0b9c6ed 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -55,6 +55,13 @@ target_link_libraries(ur_loader PRIVATE ${PROJECT_NAME}::headers ) +if(UR_STATIC_ADAPTER_L0) + target_link_libraries(ur_loader PRIVATE + ur_adapter_level_zero + ) + target_compile_definitions(ur_loader PUBLIC UR_STATIC_ADAPTER_LEVEL_ZERO) +endif() + if(UR_ENABLE_TRACING) target_link_libraries(ur_loader PRIVATE ${TARGET_XPTI}) target_include_directories(ur_loader PRIVATE ${xpti_SOURCE_DIR}/include) diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 1ae7aed535..7e148f78b8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -8587,6 +8587,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8645,6 +8650,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8735,6 +8745,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8829,6 +8844,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8890,6 +8910,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -8973,6 +8998,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9035,6 +9065,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9096,6 +9131,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9167,6 +9207,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9223,6 +9268,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9287,6 +9337,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9344,6 +9399,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9405,6 +9465,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9476,6 +9541,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9533,6 +9603,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9594,6 +9669,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9653,6 +9733,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9714,6 +9799,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9769,6 +9859,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9829,6 +9924,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } @@ -9891,6 +9991,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( // Load the device-platform DDI tables for (auto &platform : ur_loader::getContext()->platforms) { + // statically linked adapter inside of the loader + if (platform.handle == nullptr) { + continue; + } + if (platform.initStatus != UR_RESULT_SUCCESS) { continue; } diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index bfc9da3e50..0164f8b00e 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -8,12 +8,22 @@ * */ #include "ur_loader.hpp" +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#include "adapters/level_zero/ur_interface_loader.hpp" +#endif namespace ur_loader { /////////////////////////////////////////////////////////////////////////////// context_t *getContext() { return context_t::get_direct(); } /////////////////////////////////////////////////////////////////////////////// +context_t::context_t() { +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO + auto &level_zero = platforms.emplace_back(nullptr); + ur::level_zero::urAdapterGetDdiTables(&level_zero.dditable.ur); +#endif +} + ur_result_t context_t::init() { for (const auto &adapterPaths : adapter_registry) { for (const auto &path : adapterPaths) { diff --git a/source/loader/ur_loader.hpp b/source/loader/ur_loader.hpp index e2bdc7495d..0345510de8 100644 --- a/source/loader/ur_loader.hpp +++ b/source/loader/ur_loader.hpp @@ -31,6 +31,8 @@ using platform_vector_t = std::vector; class context_t : public AtomicSingleton { public: + context_t(); + ur_api_version_t version = UR_API_VERSION_CURRENT; platform_vector_t platforms;